[r-cran-survey] 01/02: New upstream version 3.31-5
Andreas Tille
tille at debian.org
Sun Jan 8 15:53:50 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-survey.
commit 4376c36089fd4144bf63a484ba213fc6fe8a6112
Author: Andreas Tille <tille at debian.org>
Date: Sun Jan 8 16:51:52 2017 +0100
New upstream version 3.31-5
---
DESCRIPTION | 16 +
INDEX | 101 ++
MD5 | 214 ++++
NAMESPACE | 451 +++++++++
R/DBI.R | 404 ++++++++
R/anova.svyglm.R | 162 ++++
R/bootstrap.R | 103 ++
R/chisqsum.R | 110 +++
R/compressweights.R | 56 ++
R/confint.R | 32 +
R/count.R | 34 +
R/dbiupdate.R | 118 +++
R/ftable.svystat.R | 69 ++
R/grake.R | 429 ++++++++
R/greg.R | 223 +++++
R/ht.R | 35 +
R/loglin.R | 195 ++++
R/logrank.R | 212 ++++
R/margins.R | 186 ++++
R/mrb.R | 52 +
R/mse.R | 12 +
R/multistage.R | 821 ++++++++++++++++
R/multivariate.R | 155 +++
R/odbc.R | 220 +++++
R/olr.R | 338 +++++++
R/pFsum.R | 48 +
R/paley.R | 118 +++
R/pps.R | 485 ++++++++++
R/regtest.R | 291 ++++++
R/stdize.R | 24 +
R/stratsample.R | 13 +
R/survey.R | 2154 +++++++++++++++++++++++++++++++++++++++++
R/surveyby.R | 259 +++++
R/surveychisq.R | 444 +++++++++
R/surveygraph.R | 244 +++++
R/surveyrep.R | 2149 ++++++++++++++++++++++++++++++++++++++++
R/svycdf.R | 50 +
R/svyhist.R | 22 +
R/svykappa.R | 32 +
R/svykm.R | 381 ++++++++
R/svymi.R | 249 +++++
R/svypredmeans.R | 30 +
R/svyranktest.R | 107 ++
R/svysmooth.R | 143 +++
R/svyttest.R | 105 ++
R/sysdata.rda | Bin 0 -> 477 bytes
R/transform.R | 9 +
R/twophase.R | 772 +++++++++++++++
R/twophase2.R | 682 +++++++++++++
R/weightconstruction.R | 217 +++++
THANKS | 78 ++
TODO | 57 ++
build/vignette.rds | Bin 0 -> 353 bytes
data/api.rda | Bin 0 -> 227940 bytes
data/crowd.rda | Bin 0 -> 199 bytes
data/election.rda | Bin 0 -> 96440 bytes
data/fpc.rda | Bin 0 -> 365 bytes
data/hospital.rda | Bin 0 -> 441 bytes
data/mu284.rda | Bin 0 -> 329 bytes
data/nhanes.rda | Bin 0 -> 58568 bytes
data/scd.rda | Bin 0 -> 223 bytes
data/yrbs.rda | Bin 0 -> 22388 bytes
inst/BUGS | 10 +
inst/CITATION | 21 +
inst/COPYING | 5 +
inst/NEWS | 1144 ++++++++++++++++++++++
inst/api.db | Bin 0 -> 142336 bytes
inst/disclaimer | 8 +
inst/doc/domain.R | 67 ++
inst/doc/domain.Rnw | 111 +++
inst/doc/domain.pdf | Bin 0 -> 103112 bytes
inst/doc/epi.R | 135 +++
inst/doc/epi.Rnw | 250 +++++
inst/doc/epi.pdf | Bin 0 -> 134474 bytes
inst/doc/nwtco-subcohort.rda | Bin 0 -> 949 bytes
inst/doc/nwts.rda | Bin 0 -> 323 bytes
inst/doc/phase1.R | 58 ++
inst/doc/phase1.Rnw | 134 +++
inst/doc/phase1.pdf | Bin 0 -> 123670 bytes
inst/doc/pps.R | 53 +
inst/doc/pps.Rnw | 103 ++
inst/doc/pps.pdf | Bin 0 -> 169350 bytes
inst/doc/survey.R | 70 ++
inst/doc/survey.Rnw | 100 ++
inst/doc/survey.pdf | Bin 0 -> 85473 bytes
inst/porting.to.S | 4 +
inst/twostage.pdf | Bin 0 -> 74337 bytes
inst/ucla-examples.pdf | Bin 0 -> 96720 bytes
man/HR.Rd | 37 +
man/SE.Rd | 27 +
man/anova.svyglm.Rd | 99 ++
man/api.Rd | 132 +++
man/as.fpc.Rd | 35 +
man/as.svrepdesign.Rd | 100 ++
man/as.svydesign2.Rd | 33 +
man/barplot.svystat.Rd | 51 +
man/bootweights.Rd | 76 ++
man/brrweights.Rd | 125 +++
man/calibrate.Rd | 318 ++++++
man/compressWeights.Rd | 45 +
man/confint.svyglm.Rd | 52 +
man/crowd.Rd | 40 +
man/dimnames.DBIsvydesign.Rd | 54 ++
man/election.Rd | 72 ++
man/estweights.Rd | 86 ++
man/fpc.Rd | 72 ++
man/ftable.svystat.Rd | 57 ++
man/hadamard.Rd | 67 ++
man/hospital.Rd | 36 +
man/make.calfun.Rd | 65 ++
man/marginpred.Rd | 89 ++
man/mu284.Rd | 34 +
man/nhanes.Rd | 33 +
man/nonresponse.Rd | 95 ++
man/open.DBIsvydesign.Rd | 55 ++
man/paley.Rd | 72 ++
man/pchisqsum.Rd | 108 +++
man/postStratify.Rd | 105 ++
man/rake.Rd | 114 +++
man/regTermTest.Rd | 66 ++
man/scd.Rd | 62 ++
man/stratsample.Rd | 42 +
man/subset.survey.design.Rd | 48 +
man/surveyoptions.Rd | 82 ++
man/surveysummary.Rd | 221 +++++
man/svrVar.Rd | 35 +
man/svrepdesign.Rd | 182 ++++
man/svy.varcoef.Rd | 25 +
man/svyCprod.Rd | 100 ++
man/svyby.Rd | 161 +++
man/svycdf.Rd | 61 ++
man/svychisq.Rd | 181 ++++
man/svyciprop.Rd | 103 ++
man/svycontrast.Rd | 60 ++
man/svycoplot.Rd | 53 +
man/svycoxph.Rd | 117 +++
man/svydesign.Rd | 212 ++++
man/svyfactanal.Rd | 84 ++
man/svyglm.Rd | 157 +++
man/svyhist.Rd | 57 ++
man/svykappa.Rd | 35 +
man/svykm.Rd | 109 +++
man/svyloglin.Rd | 91 ++
man/svylogrank.Rd | 80 ++
man/svymle.Rd | 150 +++
man/svyolr.Rd | 53 +
man/svyplot.Rd | 102 ++
man/svyprcomp.Rd | 91 ++
man/svypredmeans.Rd | 57 ++
man/svyquantile.Rd | 168 ++++
man/svyranktest.Rd | 75 ++
man/svyratio.Rd | 131 +++
man/svyrecvar.Rd | 125 +++
man/svysmooth.Rd | 83 ++
man/svystandardize.Rd | 59 ++
man/svyttest.Rd | 38 +
man/trimWeights.Rd | 75 ++
man/twophase.Rd | 199 ++++
man/update.survey.design.Rd | 54 ++
man/weights.survey.design.Rd | 49 +
man/with.svyimputationList.Rd | 54 ++
man/withReplicates.Rd | 107 ++
man/yrbs.Rd | 46 +
tests/DBIcheck.R | 58 ++
tests/DBIcheck.Rout.save | 119 +++
tests/README | 54 ++
tests/api.R | 6 +
tests/api.Rout.save | 440 +++++++++
tests/badcal.R | 9 +
tests/badcal.Rout.save | 38 +
tests/bycovmat.R | 78 ++
tests/bycovmat.Rout.save | 106 ++
tests/caleg.R | 75 ++
tests/caleg.Rout.save | 207 ++++
tests/check.R | 47 +
tests/check.Rout.save | 160 +++
tests/deff.R | 29 +
tests/deff.Rout.save | 74 ++
tests/domain.R | 116 +++
tests/domain.Rout.save | 220 +++++
tests/fpc.R | 4 +
tests/fpc.Rout.save | 105 ++
tests/kalton.R | 48 +
tests/kalton.Rout.save | 128 +++
tests/lonely.psu.R | 81 ++
tests/lonely.psu.Rout.save | 265 +++++
tests/multistage.R | 6 +
tests/multistage.Rout.save | 49 +
tests/nwtco-subcohort.rda | Bin 0 -> 949 bytes
tests/nwts-cch.R | 31 +
tests/nwts-cch.Rout.save | 88 ++
tests/nwts.R | 39 +
tests/nwts.Rout.save | 202 ++++
tests/nwts.rda | Bin 0 -> 1421 bytes
tests/pps.R | 46 +
tests/pps.Rout.save | 128 +++
tests/quantile.R | 19 +
tests/quantile.Rout.save | 84 ++
tests/rakecheck.R | 55 ++
tests/rakecheck.Rout.save | 132 +++
tests/raowuboot.R | 4 +
tests/raowuboot.Rout.save | 32 +
tests/regpredict.R | 39 +
tests/regpredict.Rout.save | 73 ++
tests/scoping.R | 34 +
tests/scoping.Rout.save | 122 +++
tests/survcurve.R | 30 +
tests/survcurve.Rout.save | 96 ++
tests/twophase.R | 133 +++
tests/twophase.Rout.save | 271 ++++++
vignettes/domain.Rnw | 111 +++
vignettes/epi.Rnw | 250 +++++
vignettes/phase1.Rnw | 134 +++
vignettes/pps.Rnw | 103 ++
vignettes/survey.Rnw | 100 ++
215 files changed, 27586 insertions(+)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100755
index 0000000..99345e6
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,16 @@
+Package: survey
+Title: Analysis of Complex Survey Samples
+Description: Summary statistics, two-sample tests, rank tests, generalised linear models, cumulative link models, Cox models, loglinear models, and general maximum pseudolikelihood estimation for multistage stratified, cluster-sampled, unequally weighted survey samples. Variances by Taylor series linearisation or replicate weights. Post-stratification, calibration, and raking. Two-phase subsampling designs. Graphics. PPS sampling without replacement. Principal components, factor analysis.
+Version: 3.31-5
+Author: Thomas Lumley
+Maintainer: "Thomas Lumley" <t.lumley at auckland.ac.nz>
+License: GPL-2 | GPL-3
+Depends: R (>= 2.14.0), grid, methods, Matrix, survival
+Imports: stats, graphics, splines, lattice
+Suggests: foreign, MASS, KernSmooth, hexbin, mitools, RSQLite, RODBC,
+ quantreg, parallel, CompQuadForm, DBI
+URL: http://r-survey.r-forge.r-project.org/survey/
+NeedsCompilation: no
+Packaged: 2016-11-30 23:45:52 UTC; tlum005
+Repository: CRAN
+Date/Publication: 2016-12-01 21:19:54
diff --git a/INDEX b/INDEX
new file mode 100755
index 0000000..9746b4c
--- /dev/null
+++ b/INDEX
@@ -0,0 +1,101 @@
+svydesign Specify a survey design
+twophase Specify two-phase designs
+svrepdesign Specify replication-weight designs
+as.svrepdesign Compute replication weights for a design
+svydesign.imputationList Designs for multiply-imputed data.
+subset.survey.design Subset of survey
+update.survey.design Add variables to a survey design
+postStratify Post-stratify a survey design
+rake Rake a survey design
+calibrate Calibration and generalised raking
+estWeight Estimated weights for two-phase designs
+trimWeights Trim sampling weights.
+
+ -----
+
+svyplot Survey-weighted graphics
+svycoplot Conditioning plots for survey data
+svyhist Survey-weighted histogram
+svysmooth Smoothers and density estimates
+svycdf Cumulative distribution functions
+svykm Survival curves
+svyboxplot Survey-weighted boxplots
+svyby Tables of statistics
+ftable.svystat Formatted tables of statistics
+ftable.svrepstat
+ftable.svyby
+cv extract coefficient of variation
+SE extract standard error
+deff extract design effect
+confint extract confidence intervals
+svycontrast linear combinations of estimates
+
+ -----
+
+svycoxph Survey-weighted Cox models
+svyglm Survey-weighted generalised linear models.
+svyolr Ordinal logistic regression
+regTermTest Test sets of terms in a regression model
+svyloglin Loglinear models
+svyquantile Summary statistics for sample surveys
+svytotal
+svymean
+svyvar
+svyratio
+svytable Contingency tables
+svychisq Tests for two-way tables
+svyttest Design-based t-test
+svyranktest Design-based two-sample rank tests
+svylogrank Design-based logrank tests
+svystandardize Direct standardisation
+svykappa Cohen's kappa for inter-rater agreement
+svymle Maximum pseudolikelihood estimation in complex surveys
+svyprcomp Principal component analysis and biplots
+svyfactanal Weighted maximum likelihood factor analysis
+
+ -----
+
+withReplicates Replicate variances for arbitrary statistics
+
+ ----
+hadamard Hadamard matrices
+is.hadamard
+paley Paley-type Hadamard matrices
+ ----
+
+stratsample Take a stratified sample
+ ----
+
+Utility functions:
+make.calfun Define calibration metrics
+cal.linear
+cal.raking
+cal.logit
+*svy.varcoef Sandwich variance estimator for glms
+*svyCprod Computations for survey variances
+*svyrecvar Computations for multistage samples
+*svrVar Computations for replication variances
+jk1weights create replication weights
+jknweights create replication weights
+bootweights create replication weights
+brrweights create replication weights
+mrbweights create replication weights
+grake Computations for generalised raking
+*regcalibrate Computations for regression calibration
+degf Degrees of freedom for replication weights
+*twophasevar Variances in two-phase designs
+as.svydesign2 Convert a old svydesign object to
+ the new format.
+
+* not exported from NAMESPACE
+ -----
+Data:
+election US election data
+fpc Small artificial example
+scd Cardiac arrests example
+api California Academic Performance Index
+crowd Small example from VPLX manual
+mu284 Two-stage sample from MU284 population
+nhanes Subset of NHANES 2009-10
+yrbs One variable from Youth Risk Behavior Survey
+
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..39095b1
--- /dev/null
+++ b/MD5
@@ -0,0 +1,214 @@
+bb21735c11d2606deabd73b1e62df4cb *DESCRIPTION
+277c7af9a4124eedd9189928b8d964ea *INDEX
+175f0dc82260e98f5a49d644d1c0e2a9 *NAMESPACE
+45c49773b7c06193c416f570f41bf4e6 *R/DBI.R
+44549058d2737ce0bfb2b5486f83ff46 *R/anova.svyglm.R
+5b81c3cd21b951d535a27679230eeda4 *R/bootstrap.R
+4a27dd1029f20c1db823e474cecd6994 *R/chisqsum.R
+b5cffd73326ccfbef1954d2ad032efa0 *R/compressweights.R
+fd916fce7443a3771c3a965175efa324 *R/confint.R
+e4b7519f8c5cad157788d9ff0bebb9b5 *R/count.R
+c9e16bb78e9f9d0b8c5dacc4ba7c635c *R/dbiupdate.R
+c5f1dd01370e4c3d7f3d6d3558ca15ac *R/ftable.svystat.R
+9b5f259e3dbe48c2f9672342190d8928 *R/grake.R
+c3ae1151ba89b407fd5bcf9907d053fb *R/greg.R
+8abae944629ac267a54eb76119674a18 *R/ht.R
+7f99c61dc8bd8de3d1ac4d8c6695408d *R/loglin.R
+11d41fda4b83a1e193ac5af76958676b *R/logrank.R
+731acf9fc8cfb33bec878fed7128385d *R/margins.R
+460e2318fe0dde7157ad7fbba843eee0 *R/mrb.R
+9ca0b28566332e9e3d5236293fda88f5 *R/mse.R
+a097dede9cd97ad639c7f07f448eb466 *R/multistage.R
+7b6d54c119096397e472d504308cf749 *R/multivariate.R
+87f0927d8f16bf594a9e2a03413638df *R/odbc.R
+6fa343fbbc3e78131350ccaa6ad7478a *R/olr.R
+8169eb47021665ca2790739b12669641 *R/pFsum.R
+d7bde53f7898b0ed437962b894c9b798 *R/paley.R
+0c78cddb50c37c625b02ba83be836fd0 *R/pps.R
+6c35a4a344593247e9986be7c345d25b *R/regtest.R
+18f0183f59c24e9c46c0c9126a4c7472 *R/stdize.R
+14aabec32aa01b0ba0783771cd12a355 *R/stratsample.R
+e227c31eaca954643c08e8b34d8c6e32 *R/survey.R
+e867dc537df5a0597e28540ea441e156 *R/surveyby.R
+080da7f97232c658bdb864e2d2c4c74a *R/surveychisq.R
+61298ac4e7791c23beaf5745991a8c3f *R/surveygraph.R
+1ba10164959829d4103fc9cc574fad5f *R/surveyrep.R
+5bc03f84ff7bf31c74e055c9efe00678 *R/svycdf.R
+80cfb5f559a64507400fa4d2d1b46f1b *R/svyhist.R
+f167949077f73d87c969525ae5e11550 *R/svykappa.R
+2b4889035a00f98f1b27a9e3f76dd2fb *R/svykm.R
+dcf500be3b5b5432d57cba2e763a9a1c *R/svymi.R
+9c89fe232fc819518a6ececefcef7a38 *R/svypredmeans.R
+cb39b6c0471f473b887426b65410e61e *R/svyranktest.R
+67ee66fbd9f413a2b3b0d52d78016d4f *R/svysmooth.R
+c6d29ed0ad3f92a00eedaa2c9a1bc6ac *R/svyttest.R
+6f44084da3149913aa836a5e753e15e9 *R/sysdata.rda
+3a2f19a9a7b6e5f206a6f8a5537fb2d4 *R/transform.R
+55658d3ca0fc40e64428174e6182c07a *R/twophase.R
+769bfc83ea7282246d6dc395ceb2d600 *R/twophase2.R
+6c069bda823793258fa7630e6c8138ab *R/weightconstruction.R
+47d855e337347d66bd6a6c1b4c5ba89b *THANKS
+72b41af2715bc7e0eeacfccb34a1eab0 *TODO
+1ea8d38e469206bc76aa2f5ec9212b2b *build/vignette.rds
+e41b014b3601f7abe86cdc9f7d054733 *data/api.rda
+9aeb74c802ba05095da4c131e3bad280 *data/crowd.rda
+810c79d2d30b3312ba52ab78fb42e840 *data/election.rda
+a0ab4e6f1f4f3c57bcd5826b5a1c32fb *data/fpc.rda
+8adbb06d0c984265863f796ea961d08e *data/hospital.rda
+6ee5e4d8b21a53d5e041af72ba94aefd *data/mu284.rda
+f1ddb94254f3e1cc3500ad7807bc6603 *data/nhanes.rda
+c62d5c1c21c8acd86f9a1d14a5ce26e3 *data/scd.rda
+ee7c24b90192c2fee02439e60c672fee *data/yrbs.rda
+b31a81da0dd6e5adc582717575192368 *inst/BUGS
+5de896181e6740bd3617be55d60185e7 *inst/CITATION
+9dbfcdd76553194b1513751a02042f91 *inst/COPYING
+c68c3a22e8f5f110d8e0d94386799ce8 *inst/NEWS
+ad0ca60c6d7d1c64ed77618b40c5d1c5 *inst/api.db
+fedebdbde4e7cfe28b3941fc8371e857 *inst/disclaimer
+bd13d234657132da1a44e0a33bcfb456 *inst/doc/domain.R
+e549b9eba82a7a3db5543ec5be0b0bc6 *inst/doc/domain.Rnw
+9d8ecf9d71520cf38580f8efc0999693 *inst/doc/domain.pdf
+a5142360787a98ec409e10229bbd6f00 *inst/doc/epi.R
+cf470caff322c36517ae4ac286e420ee *inst/doc/epi.Rnw
+fe5f6b29c9ffd36bb53425445bc8af49 *inst/doc/epi.pdf
+d82d7a650f7a3ed2b846c6280fab38b5 *inst/doc/nwtco-subcohort.rda
+d102892ef1145ee2e54b13b668714651 *inst/doc/nwts.rda
+e473dab0d0bdae45394af33e262ce816 *inst/doc/phase1.R
+a3fc06bb909b1854d4bdb8f14bd0a5cd *inst/doc/phase1.Rnw
+81ed17de82af7ebbec63620560898807 *inst/doc/phase1.pdf
+d0e73408df9033a260a40660f32e1b02 *inst/doc/pps.R
+45d31ccd0e3bcb193a3cbce5c89824e7 *inst/doc/pps.Rnw
+3b2019f45921e18ea8d5dfc79fbfd5eb *inst/doc/pps.pdf
+da0b23328004b9044b0f62f9341945b9 *inst/doc/survey.R
+dfdd268e5e85cdb9422e11b2c183aa79 *inst/doc/survey.Rnw
+cf298bd436e47aa0cfb8215211d9d3da *inst/doc/survey.pdf
+67fd0dff07ced7069e2ac813865dbf58 *inst/porting.to.S
+0caeffbff77347fc2ab06f30534c3f46 *inst/twostage.pdf
+4db9100417c9160c9271bd853cd93f86 *inst/ucla-examples.pdf
+daf1910c21501f14a63aa2a7419c1466 *man/HR.Rd
+234787faa3310c2325952029685b5272 *man/SE.Rd
+052544ab43b19c0272ce22802116d230 *man/anova.svyglm.Rd
+80f214380630b6e1af6b9cec33d2e045 *man/api.Rd
+1df4e44a0124abadc68eeba8da160f7d *man/as.fpc.Rd
+f0477fbe848d323ffbd1aa84d636b6a7 *man/as.svrepdesign.Rd
+ca07d024bc8ad2920881be3a92416865 *man/as.svydesign2.Rd
+a2487bc8b605e35fa16fc160f7194055 *man/barplot.svystat.Rd
+c48b922c011640f2ab6585bf91f6d37f *man/bootweights.Rd
+b05701bb4ccac840031ff7f887bd55a7 *man/brrweights.Rd
+2fd0e73c03ab0b23ee9f1d23ca3ec8b2 *man/calibrate.Rd
+f98c7338d5ed2bf6930eaf66a5f0a1e7 *man/compressWeights.Rd
+a664c485c38842cd41c1458b4110101a *man/confint.svyglm.Rd
+b88cbcc5a0339e24aae8cac15abc6085 *man/crowd.Rd
+5e635a33c60f5b5ced5b6b2d3adbe088 *man/dimnames.DBIsvydesign.Rd
+55e032d442b0447687400e28e4085d29 *man/election.Rd
+3d46235191e8edd4839ac7ddddb272e8 *man/estweights.Rd
+4a89f9119309209a91debbaac354e0f2 *man/fpc.Rd
+1fc9004a0c1bb430f025861083e5b379 *man/ftable.svystat.Rd
+809a5e046151f44cdc4288915f1be7a5 *man/hadamard.Rd
+a9cb5d02e17af9460aa10aa8d1b6497a *man/hospital.Rd
+8564a70062d69df416337a057a7df340 *man/make.calfun.Rd
+493380df216b5c1b55bbdd05ec4cf22a *man/marginpred.Rd
+14e3927d8da68cd926652ec8462a16f9 *man/mu284.Rd
+29e8240a02389f11221722f8683bbff4 *man/nhanes.Rd
+8447ae42f78e376968421ee46543a195 *man/nonresponse.Rd
+e1f8b3f012cf614ebc7b5729200f4a0d *man/open.DBIsvydesign.Rd
+ab3aaf2c676494d82b0b440385ac2f05 *man/paley.Rd
+1ddc4aec65f1c8e28cca6e5e41cd2b6e *man/pchisqsum.Rd
+e8fa2c17aa1230065ae000fdb0913157 *man/postStratify.Rd
+6e7610099924ec8ebda3bfc6dcc40015 *man/rake.Rd
+11e1f9cf0d72ace91f9ea09b422056dd *man/regTermTest.Rd
+c38155f62ea15df2c750a0ca1f2cf5ff *man/scd.Rd
+db845dc8216a7d04e1c224b93f764d5f *man/stratsample.Rd
+e2d5e837e99c491b1c02c0d135516aa3 *man/subset.survey.design.Rd
+4d4f8685de34adf511eca14f1d25bcd1 *man/surveyoptions.Rd
+9970c91560e1c33898ba7bc2d4f71010 *man/surveysummary.Rd
+4c4993863452ec84eeb68d5ef07c7e6f *man/svrVar.Rd
+b959cd069e930a155c09c5f7bc32c0a4 *man/svrepdesign.Rd
+939d043da650a1f44502ba376d12ec3d *man/svy.varcoef.Rd
+ae19a003c9f60058fa4cb749f23e1c68 *man/svyCprod.Rd
+44100e27219878082cc8447e9ea523ac *man/svyby.Rd
+533fe820a1e613b0c8a395b49af7444b *man/svycdf.Rd
+0f088991a5dc31fb88970fd62c1b0dd4 *man/svychisq.Rd
+4be2574f626a95dc2dab3fa0bfd3e04e *man/svyciprop.Rd
+07c194c6877552919c3292edd6b92ea4 *man/svycontrast.Rd
+76f1c76aaf479969dab8528d4958ef74 *man/svycoplot.Rd
+a2660919025445d508679096d71fc740 *man/svycoxph.Rd
+22f65fcef51ec027097d328c4b41095e *man/svydesign.Rd
+ce556903b7ec044cdbfccceb75395dfc *man/svyfactanal.Rd
+82cd3c9151c1439825a141301af1789a *man/svyglm.Rd
+8bf0d8396e58c466da6ddadd2bf2f28f *man/svyhist.Rd
+38354ceab289e3056f6a3cb44f74c23b *man/svykappa.Rd
+342cc57dfb19aabc36854c557774739b *man/svykm.Rd
+930237e3c9e38a447e00b8390528a4af *man/svyloglin.Rd
+601083be411fdc19d1c5021de705245a *man/svylogrank.Rd
+e951bfb0758e87c8f86e1aba63a697c8 *man/svymle.Rd
+9222dce182bacbe70b3c72e9bccb0128 *man/svyolr.Rd
+cc87316618a887818b7b034c3d091969 *man/svyplot.Rd
+44aa5fc0f7128f7f84b32e8fa55f7bc3 *man/svyprcomp.Rd
+3be0b543b4f64d2c7033cceaf4672e97 *man/svypredmeans.Rd
+cd1a60b4e0228e6e2c8162d361756f73 *man/svyquantile.Rd
+0132b662eca171a0a5e547548a6829ea *man/svyranktest.Rd
+b5c15b59be245553e35425cf269f7031 *man/svyratio.Rd
+40e1f8bc28b3aacbb73efa5996b51f5f *man/svyrecvar.Rd
+671fb4c053614fe861700f3d5f4d804f *man/svysmooth.Rd
+f2789526be33e6b107178778ad9b9212 *man/svystandardize.Rd
+43ef70402c346de858bf8c497244b3c2 *man/svyttest.Rd
+74fc6fcfb5bc67e0178f6522fd33adfd *man/trimWeights.Rd
+91161a15e1718f9141c8e196dd99752f *man/twophase.Rd
+76f2f9f51b9ac247742fc0ee1b0b7f95 *man/update.survey.design.Rd
+20ee2b3e3a8d1848105f1cee6d57aeab *man/weights.survey.design.Rd
+10ba32bc8528e4861b29c0044b20e802 *man/with.svyimputationList.Rd
+61dc769cecbd6460f91b7a77401c9c75 *man/withReplicates.Rd
+f01a3d553c7b9afd65e616b196917f3c *man/yrbs.Rd
+c91c9d0dd78c264236d5aeadbe6de4aa *tests/DBIcheck.R
+a36403b415fc95b10256ad06ac94add6 *tests/DBIcheck.Rout.save
+dc2a6feb6e6e21f3fef6be3c2ee3bdcb *tests/README
+527e5ba782684ca6a459d09a52e4bb4b *tests/api.R
+3ad53ef34903baaea1aabc5d8f4cd0f9 *tests/api.Rout.save
+eba420580d5712b2bdbee74f844e2c9e *tests/badcal.R
+fca32d6127f578fe7cacf6957c9bcb06 *tests/badcal.Rout.save
+0ece37ceda0022b032153d4496a3f943 *tests/bycovmat.R
+0d4380b36115f8543b2476ddac923676 *tests/bycovmat.Rout.save
+24dd5b1d7fc929b24f0927ee85b8d0ef *tests/caleg.R
+2276d7c59d2592aa48a718674003620f *tests/caleg.Rout.save
+bb39e18b97ab5f91094f9ceb71d992f5 *tests/check.R
+d4e620440333904b7e057ee26984fe12 *tests/check.Rout.save
+9afd1c4ef859ba69520afe4825a3b1e2 *tests/deff.R
+1c28ba9df80a9d65aee6d49bb767148d *tests/deff.Rout.save
+430cd5207b02ef92d2d5a75e1bdf9074 *tests/domain.R
+62c918ff711bd24cfa3d860df5556513 *tests/domain.Rout.save
+85b0201ab5b022cf0e51b6badfc5c07b *tests/fpc.R
+a9f2ac6a2481cb8100a634703fe5c0d1 *tests/fpc.Rout.save
+32174d2a986af7276bdb7daa62670c77 *tests/kalton.R
+9b30594ac4527d0ece5bab043f6c06f1 *tests/kalton.Rout.save
+3ab5b5b8d2fa053b62832df2585db7ee *tests/lonely.psu.R
+0add96a76dd1182564771813281109f8 *tests/lonely.psu.Rout.save
+5c88d425cfb80a6b350763acff6c2b92 *tests/multistage.R
+188777251024d61793ab0fb393cc175c *tests/multistage.Rout.save
+d82d7a650f7a3ed2b846c6280fab38b5 *tests/nwtco-subcohort.rda
+9f37aa87c54f9e9585f6ed839da38a75 *tests/nwts-cch.R
+18298f29aee3995d9e480d5058a09ca1 *tests/nwts-cch.Rout.save
+e7866681f0a2aa1487345e96e77fe49a *tests/nwts.R
+bd1e49943f467fe7b2591764fd66fe4e *tests/nwts.Rout.save
+f78a9ca2f7d82b635a9ac83c84f22721 *tests/nwts.rda
+1f2c987cef798c0d6c8635a5be720c24 *tests/pps.R
+73e8268a284580d4d32ff928262a88c8 *tests/pps.Rout.save
+d5a275559b33371fc701d467a1e1b8ba *tests/quantile.R
+88b3bae4b81c7b895bbea52c14b4a9c4 *tests/quantile.Rout.save
+bb6b47311c28af44e4386146bbb766cd *tests/rakecheck.R
+227b1e5214e1f36393633ed5b926a154 *tests/rakecheck.Rout.save
+23d3f3e5924ebac663762223b9073dc5 *tests/raowuboot.R
+597d97f91f48834e3567ce60d55a56a3 *tests/raowuboot.Rout.save
+73d743b02cde34dea66b7a5a1e358a69 *tests/regpredict.R
+17f986ce4a44a05026529890440f7e9e *tests/regpredict.Rout.save
+08e7b85dd0996a1afa68e8dfcb698a4d *tests/scoping.R
+c21c1abba50dae13b4eb2d4738f20461 *tests/scoping.Rout.save
+ce54c4edad7a3b9fd9c8aa995473a794 *tests/survcurve.R
+3cb8a60c150867bbd537a9c0a2493f6d *tests/survcurve.Rout.save
+ebd38893f6dfe5810cc67e87373c2440 *tests/twophase.R
+96a10516fa9b9a7cd6548c1845eca0f7 *tests/twophase.Rout.save
+e549b9eba82a7a3db5543ec5be0b0bc6 *vignettes/domain.Rnw
+cf470caff322c36517ae4ac286e420ee *vignettes/epi.Rnw
+a3fc06bb909b1854d4bdb8f14bd0a5cd *vignettes/phase1.Rnw
+45d31ccd0e3bcb193a3cbce5c89824e7 *vignettes/pps.Rnw
+dfdd268e5e85cdb9422e11b2c183aa79 *vignettes/survey.Rnw
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..22af8f3
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,451 @@
+import(stats,graphics,splines,grid, survival,lattice,methods,Matrix)
+importFrom("grDevices", "col2rgb", "grey", "rgb")
+importFrom("utils", "getS3method")
+export(svydesign, svrepdesign, as.svrepdesign,twophase, postStratify,
+ rake, calibrate, estWeights, as.svydesign2, svyplot, svyhist,
+ svyboxplot, svyby, cv, SE, deff, svycoxph, svyglm, svyquantile,
+ svymean, svytotal, svyvar, svyratio, svytable, svychisq, svymle, svysmooth,
+ withReplicates, hadamard, paley, is.hadamard, bootweights, mrbweights,
+ jk1weights,jknweights, brrweights, regTermTest, degf, nonresponse,
+ sparseCells, neighbours, joinCells, compressWeights, make.formula,
+ svycontrast, svyCprod, svyrecvar, grake, svrVar, pchisqsum, pFsum,
+ make.calfun, cal.linear, cal.raking, cal.logit, svycdf, svykappa, svykm,
+ svyolr, svycoplot, svyloglin, make.panel.svysmooth, dotchart, subbootweights,
+ svyciprop, svyttest, marginpred, twophasevar, twophase2var, ppsmat, HR, unwtd.count,
+ svyfactanal, svypredmeans, svyprcomp, trimWeights,stratsample,svylogrank,svyranktest,svystandardize)
+
+S3method(svyloglin,survey.design)
+S3method(svyloglin,svyrep.design)
+S3method(svyloglin,DBIsvydesign)
+S3method(svyloglin,ODBCsvydesign)
+S3method(anova,svyloglin)
+S3method(print,svyloglin)
+S3method(print,anova.svyloglin)
+S3method(coef,svyloglin)
+S3method(vcov,svyloglin)
+S3method(deviance,svyloglin)
+S3method(print,summary.svyloglin)
+S3method(summary,svyloglin)
+S3method(update,svyloglin)
+S3method(model.matrix,svyloglin)
+S3method(terms,svyloglin)
+S3method(degf,svyloglin)
+S3method(svyttest,default)
+S3method(biplot,svyprcomp)
+
+S3method(marginpred,svycoxph)
+S3method(marginpred, svykmlist)
+S3method(marginpred, svyglm)
+
+S3method(print,svyciprop)
+S3method(vcov,svyciprop)
+S3method(confint,svyciprop)
+S3method(coef,svyciprop)
+
+
+S3method(svydesign, default)
+S3method(svydesign, imputationList)
+S3method(svydesign, character)
+S3method(svydesign, DBimputationList)
+
+S3method(close, DBIsvydesign)
+S3method(open, DBIsvydesign)
+S3method(summary, DBIsvydesign)
+S3method(print, summary.DBIsvydesign)
+S3method(print, DBIsvydesign)
+
+S3method(close, ODBCsvydesign)
+S3method(open, ODBCsvydesign)
+S3method(summary, ODBCsvydesign)
+S3method(print, summary.ODBCsvydesign)
+S3method(print, ODBCsvydesign)
+S3method(print, pps)
+S3method(summary, pps)
+S3method(print, summary.pps)
+
+S3method(svrepdesign, default)
+S3method(svrepdesign, imputationList)
+S3method(svrepdesign, character)
+
+S3method(print, DBIrepdesign)
+S3method(print, ODBCrepdesign)
+S3method(summary, DBIrepdesign)
+S3method(summary, ODBCrepdesign)
+
+S3method(svymean, survey.design)
+S3method(svymean, svyrep.design)
+S3method(svymean, survey.design2)
+S3method(svymean, twophase)
+S3method(svymean, twophase2)
+S3method(svymean, pps)
+S3method(svymean, DBIsvydesign)
+S3method(svymean, ODBCsvydesign)
+
+S3method(svytotal, survey.design)
+S3method(svytotal, svyrep.design)
+S3method(svytotal, survey.design2)
+S3method(svytotal, twophase)
+S3method(svytotal, twophase2)
+S3method(svytotal, pps)
+S3method(svytotal, DBIsvydesign)
+S3method(svytotal, ODBCsvydesign)
+
+S3method(svyratio, survey.design)
+S3method(svyratio, svyrep.design)
+S3method(svyratio, survey.design2)
+S3method(svyratio, pps)
+S3method(svyratio, twophase)
+S3method(svyratio, twophase2)
+S3method(svyratio, DBIsvydesign)
+S3method(svyratio, ODBCsvydesign)
+
+S3method(svyvar, survey.design)
+S3method(svyvar, svyrep.design)
+S3method(svyvar, DBIsvydesign)
+S3method(svyvar, ODBCsvydesign)
+
+S3method(svyquantile, survey.design)
+S3method(svyquantile, svyrep.design)
+S3method(svyquantile, DBIsvydesign)
+S3method(svyquantile, ODBCsvydesign)
+
+S3method(svytable, survey.design)
+S3method(svytable, svyrep.design)
+S3method(svytable, DBIsvydesign)
+S3method(svytable, ODBCsvydesign)
+
+S3method(svychisq, survey.design)
+S3method(svychisq, svyrep.design)
+S3method(svychisq, DBIsvydesign)
+S3method(svychisq, ODBCsvydesign)
+S3method(svychisq, twophase)
+
+S3method(svykappa, default)
+S3method(svykappa, DBIsvydesign)
+S3method(svykappa, ODBCsvydesign)
+
+S3method(svyglm,survey.design)
+S3method(svyglm,svyrep.design)
+S3method(svyglm, DBIsvydesign)
+S3method(svyglm, ODBCsvydesign)
+
+S3method(svyplot,default)
+S3method(svyplot, DBIsvydesign)
+S3method(svyplot, ODBCsvydesign)
+S3method(svycoplot,default)
+S3method(svycoplot, DBIsvydesign)
+S3method(svycoplot, ODBCsvydesign)
+S3method(svysmooth, default)
+S3method(svysmooth, DBIsvydesign)
+S3method(svyboxplot, DBIsvydesign)
+S3method(svysmooth, ODBCsvydesign)
+S3method(svyboxplot, ODBCsvydesign)
+S3method(svyboxplot, default)
+S3method(svycdf, default)
+S3method(svycdf, DBIsvydesign)
+S3method(svycdf, ODBCsvydesign)
+
+S3method(plot, svystat)
+S3method(plot, svrepstat)
+S3method(barplot, svystat)
+S3method(barplot, svrepstat)
+S3method(plot, svyby)
+S3method(barplot, svyby)
+
+S3method(dotchart, svystat)
+S3method(dotchart, svrepstat)
+S3method(dotchart, svyby)
+S3method(dotchart, default)
+
+S3method(predict, svyglm)
+S3method(predict, svrepglm)
+S3method(predict, svyratio)
+S3method(predict, svyratio_separate)
+S3method(coef,svyratio)
+
+S3method(svycoxph,survey.design)
+S3method(svycoxph,svyrep.design)
+S3method(svycoxph, DBIsvydesign)
+S3method(svycoxph, ODBCsvydesign)
+
+S3method(residuals, svrepglm)
+S3method(residuals, svyglm)
+
+S3method(coef,svrepstat)
+S3method(coef,svystat)
+S3method(coef,svyby)
+S3method(coef,svymle)
+S3method(coef, svyolr)
+S3method(coef, svyglm)
+S3method(coef, svyquantile)
+
+S3method(confint,svyglm)
+S3method(confint, svyciprop)
+S3method(confint, svyquantile)
+
+S3method(SE,default)
+S3method(SE,svrepstat)
+S3method(SE,svystat)
+S3method(SE,svyby)
+S3method(SE,svyquantile)
+S3method(SE,svyratio)
+
+S3method(vcov, svyquantile)
+S3method(vcov, svrepstat)
+S3method(vcov, svyglm)
+S3method(vcov, svymle)
+S3method(vcov, svystat)
+S3method(vcov, svyby)
+S3method(vcov, svyratio)
+
+S3method(logLik,svyglm)
+S3method(logLik,svrepglm)
+
+S3method(extractAIC,svyglm)
+S3method(extractAIC,svrepglm)
+S3method(extractAIC,svyglm)
+S3method(anova,svyglm)
+S3method(AIC,svyglm)
+S3method(BIC,svyglm)
+
+S3method(anova,svycoxph)
+S3method(predict,svycoxph)
+
+S3method(svycontrast,svrepstat)
+S3method(svycontrast,svystat)
+S3method(svycontrast,svyby)
+S3method(svycontrast,svyglm)
+S3method(svycontrast,svycoxph)
+S3method(svycontrast,default)
+
+S3method(image,svyrep.design)
+S3method(image,pps)
+S3method(plot,svysmooth)
+S3method(print,svysmooth)
+S3method(lines,svysmooth)
+
+S3method(svykm, survey.design)
+S3method(svykm, svyrep.design)
+S3method(svykm, DBIsvydesign)
+S3method(svykm, ODBCsvydesign)
+S3method(plot,svykm)
+S3method(print,svykm)
+S3method(print,svykmlist)
+S3method(lines, svykm)
+S3method(plot,svykmlist)
+S3method(quantile, svykm)
+S3method(confint,svykm)
+
+S3method(svyolr,survey.design2)
+S3method(svyolr,svyrep.design)
+S3method(svyolr,DBIsvydesign)
+S3method(svyolr,ODBCsvydesign)
+S3method(vcov,svyolr)
+S3method(summary, svyolr)
+S3method(print, svyolr)
+S3method(print, summary.svyolr)
+
+S3method(ftable,svrepstat)
+S3method(ftable,svystat)
+S3method(ftable,svyby)
+
+
+S3method(weights, nonresponse)
+S3method(weights, survey.design)
+S3method(weights, survey_fpc)
+S3method(weights, svyrep.design)
+
+S3method(summary,survey.design)
+S3method(summary,survey.design2)
+S3method(summary,twophase)
+S3method(summary, twophase2)
+S3method(summary,svyrep.design)
+S3method(summary,svrepglm)
+S3method(summary,svreptable)
+S3method(summary,svycoxph)
+S3method(summary,svyglm)
+S3method(summary,svymle)
+S3method(summary,svytable)
+
+S3method(print,summary.survey.design)
+S3method(print,summary.survey.design2)
+S3method(print,summary.twophase)
+S3method(print,summary.twophase2)
+S3method(print,summary.svyrep.design)
+S3method(print,summary.svyglm)
+S3method(print,summary.svytable)
+S3method(print,svycdf)
+S3method(plot,svycdf)
+S3method(print, nonresponse)
+S3method(print, nonresponseSubset)
+S3method(print, regTermTest)
+S3method(print, regTermTestLRT)
+S3method(print, svrepstat)
+S3method(print, svystat)
+S3method(print, survey.design)
+S3method(print, survey.design2)
+S3method(print, svyrep.design)
+S3method(print, svyglm)
+S3method(print, svymle)
+S3method(print, svyquantile)
+S3method(print, svyratio)
+S3method(print, svyratio_separate)
+S3method(print, twophase)
+S3method(print, twophase2)
+S3method(print, calfun)
+S3method(print, svyvar)
+S3method(print, seqanova.svyglm)
+S3method(as.matrix, svyvar)
+S3method(print, svrepvar)
+S3method(as.matrix, svrepvar)
+
+S3method(withReplicates, svyrep.design)
+S3method(withReplicates, svrepvar)
+S3method(withReplicates, svrepstat)
+
+S3method(dim,repweights_compressed)
+S3method(dim,survey.design)
+S3method(dim,twophase)
+S3method(dim,twophase2)
+S3method(dim, DBIsvydesign)
+S3method(dim, DBIrepdesign)
+S3method(dim, ODBCsvydesign)
+S3method(dim, svyrep.design)
+S3method(dim, svyimputationList)
+
+S3method(dimnames,survey.design)
+S3method(dimnames,svyrep.design)
+S3method(dimnames,twophase)
+S3method(dimnames, DBIsvydesign)
+S3method(dimnames, DBIrepdesign)
+S3method(dimnames, ODBCsvydesign)
+S3method(dimnames, svyimputationList)
+
+S3method(dimnames, repweights_compressed)
+
+S3method(degf, survey.design2)
+S3method(degf, svyrep.design)
+S3method(degf, pps)
+S3method(degf, twophase)
+S3method(degf, twophase2)
+
+S3method(cv,default)
+S3method(cv,svyratio)
+
+S3method(deff,default)
+S3method(deff,svyby)
+
+S3method(postStratify,survey.design)
+S3method(postStratify,svyrep.design)
+S3method(postStratify,twophase)
+S3method(postStratify,twophase2)
+S3method(postStratify,DBIsvydesign)
+S3method(postStratify,ODBCsvydesign)
+
+S3method(calibrate,survey.design2)
+S3method(calibrate,svyrep.design)
+S3method(calibrate,twophase)
+S3method(calibrate,twophase2)
+S3method(calibrate, DBIsvydesign)
+S3method(calibrate, ODBCsvydesign)
+
+S3method(estWeights,data.frame)
+S3method(estWeights,twophase)
+
+S3method(compressWeights, default)
+S3method(compressWeights, repweights_compressed)
+S3method(compressWeights, svyrep.design)
+S3method(trimWeights, svyrep.design)
+S3method(trimWeights, survey.design2)
+
+S3method(subset,survey.design)
+S3method(subset,svyrep.design)
+S3method(subset,twophase)
+S3method(subset,twophase2)
+S3method(subset, DBIsvydesign)
+S3method(subset, ODBCsvydesign)
+
+S3method(update,survey.design)
+S3method(update,svyrep.design)
+S3method(update,twophase)
+S3method(update,twophase2)
+S3method(update,DBIsvydesign)
+S3method(update,ODBCsvydesign)
+S3method(update,svyimputationList)
+S3method(transform,survey.design)
+S3method(transform,svyrep.design)
+S3method(transform,twophase)
+S3method(transform,twophase2)
+S3method(transform,DBIsvydesign)
+S3method(transform,ODBCsvydesign)
+S3method(transform,svyimputationList)
+
+
+S3method(svyby, default)
+S3method(svyby, DBIsvydesign)
+S3method(svyby, ODBCsvydesign)
+
+S3method(regcalibrate, survey.design2)
+S3method(regcalibrate, svyrep.design)
+
+S3method(na.exclude,survey.design)
+S3method(na.exclude,twophase)
+S3method(na.fail,survey.design)
+S3method(na.fail,twophase)
+S3method(na.omit,survey.design)
+S3method(na.omit,twophase)
+
+S3method(model.frame,survey.design)
+S3method(model.frame,svycoxph)
+S3method(model.frame,svyrep.design)
+S3method(model.frame, twophase)
+S3method(model.frame, twophase2)
+S3method(model.frame, svyolr)
+
+S3method(as.matrix,repweights)
+S3method(as.matrix, repweights_compressed)
+S3method(as.vector, repweights_compressed)
+
+S3method(as.data.frame,svrepstat)
+S3method(as.data.frame,svystat)
+
+S3method(with,svyimputationList)
+S3method(print, svyimputationList)
+S3method(subset, svyimputationList)
+S3method(print, svyDBimputationList)
+S3method(with, svyDBimputationList)
+S3method(update, svyDBimputationList)
+S3method(close, svyDBimputationList)
+S3method(open, svyDBimputationList)
+S3method(subset, svyDBimputationList)
+
+S3method(confint,svystat)
+S3method(confint,svrepstat)
+S3method(confint, svyratio)
+S3method(confint, svyby)
+
+S3method(`[`, survey.design)
+S3method(`[`, survey.design2)
+S3method(`[`, svyrep.design)
+S3method(`[`, twophase)
+S3method(`[`, twophase2)
+S3method(`[`, repweights_compressed)
+S3method(`[`, nonresponse)
+S3method(`[`, DBIsvydesign)
+S3method(`[`, DBIrepdesign)
+S3method(`[`, ODBCsvydesign)
+
+
+S3method(`[<-`, survey.design)
+
+
+S3method(svylogrank,survey.design2)
+S3method(svylogrank,svyrep.design)
+S3method(svylogrank,twophase)
+S3method(svylogrank,DBIsvydesign)
+S3method(svylogrank,ODBCsvydesign)
+
+S3method(svyranktest,survey.design)
+S3method(svyranktest,svyrep.design)
+S3method(svyranktest,DBIsvydesign)
+S3method(svyranktest,ODBCsvydesign)
diff --git a/R/DBI.R b/R/DBI.R
new file mode 100644
index 0000000..574c5c8
--- /dev/null
+++ b/R/DBI.R
@@ -0,0 +1,404 @@
+
+svydesign.character<-function (ids, probs = NULL, strata = NULL, variables = NULL,
+ fpc = NULL, data, nest = FALSE, check.strata = !nest,
+ weights = NULL,pps=FALSE,
+ dbtype="SQLite", dbname,
+ ...)
+{
+
+ if (dbtype == "ODBC"){
+ if (dbname=="")
+ dbconn<-RODBC::odbcDriverConnect(dbname,...)
+ else
+ dbconn<-RODBC::odbcConnect(dbname,...)
+ } else {
+ db<-DBI::dbDriver(dbtype)
+ dbconn<- DBI::dbConnect(db, dbname,...)
+ }
+ design.vars<-c(all.vars(ids), all.vars(probs), all.vars(strata),
+ all.vars(fpc), all.vars(weights))
+ design.query<-paste("select", paste(design.vars,collapse=","), "from", data)
+ if (dbtype=="ODBC")
+ design.data<-RODBC::sqlQuery(dbconn, design.query)
+ else
+ design.data<-DBI::dbGetQuery(dbconn, design.query)
+
+ rval<-svydesign(ids=ids, probs=probs, strata=strata, data=design.data,
+ fpc=fpc, variables=variables, nest=nest,check.strata=check.strata,
+ weights=weights)
+ rval$db<-list(dbname=dbname, tablename=data, connection=dbconn, dbtype=dbtype)
+ rval$variables<-NULL
+ rval$call<-sys.call(-1)
+ if (dbtype=="ODBC")
+ class(rval)<-c("ODBCsvydesign",class(rval))
+ else
+ class(rval)<-c("DBIsvydesign",class(rval))
+ rval
+}
+
+print.DBIsvydesign<-function(x,...){
+ cat("DB-backed ")
+ NextMethod()
+ if (!checkConnection(x$db$connection, error=FALSE))
+ cat("<DBI Connection closed>\n")
+ invisible(x)
+}
+
+summary.DBIsvydesign<-function(object,...){
+ class(object)<-c("summary.DBIsvydesign",class(object))
+ object
+}
+
+print.summary.DBIsvydesign<-function(x,...){
+ print.survey.design2(x,varnames=TRUE,design.summaries=TRUE,...)
+ invisible(x)
+}
+
+close.DBIsvydesign<-function(con,...){
+ DBI::dbDisconnect(con$db$connection,...)
+ invisible(con)
+}
+
+open.DBIsvydesign<-function(con,...){
+ db<-DBI::dbDriver(con$db$dbtype)
+ con$db$connection<-DBI::dbConnect(db, dbname=con$db$dbname,...)
+ con
+}
+
+svymean.DBIsvydesign<-function(x, design,...){
+ design$variables<-getvars(x, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svymean",design)
+}
+
+
+svytotal.DBIsvydesign<-function(x, design,na.rm=FALSE,...){
+ design$variables<-getvars(x, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svytotal",design)
+}
+
+svyquantile.DBIsvydesign<-function(x, design,quantiles,...){
+ design$variables<-getvars(x, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svyquantile",design)
+}
+
+
+dropFactor<-function(mf, w){
+ if(!any(w==0)) return(mf)
+ dropped<-w==0
+ for(i in 1:ncol(mf)) {
+ if (is.factor(mf[[i]])){
+ fi<-mf[[i]]
+ if (all(dropped[fi==levels(fi)[1]])){
+ tt<-table(fi[!dropped])
+ l<-min(which(tt>0))
+ levs<-levels(fi)
+ mf[[i]]<-relevel(mf[[i]],ref=levs[l])
+ }
+ }
+ }
+ mf
+}
+
+svyglm.DBIsvydesign<-function(formula, design,...){
+ design$variables<-dropFactor(getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset),
+ weights(design))
+ NextMethod("svyglm",design)
+}
+
+
+
+svyplot.DBIsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ design$variables[weights(design)==0,]<-NA
+ NextMethod("svyplot",design)
+}
+
+
+svycoplot.DBIsvydesign<-function(formula,design, style=c("hexbin","transparent"),
+ basecol="black",alpha=c(0,0.8),hexscale=c("relative","absolute"),...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename, updates=design$updates, subset=design$subset)
+ design$variables[weights(design)==0,]<-NA
+ NextMethod("svycoplot",design)
+}
+
+svyboxplot.DBIsvydesign<-function(formula,design, ...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ design$variables[weights(design)==0,]<-NA
+ class(design)<-setdiff(class(design),"DBIsvydesign")
+ svyboxplot(formula,design,...)
+}
+
+
+svycdf.DBIsvydesign<-function(formula,design, na.rm=TRUE, ...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svycdf",design)
+
+}
+
+svyolr.DBIsvydesign<-function(formula,design,...){
+ design$variables<-dropFactor(getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset),
+ weights(design))
+ NextMethod("svyolr",design)
+}
+
+svycoxph.DBIsvydesign<-function(formula,design,...){
+ design$variables<-dropFactor(getvars(formula, design$db$connection, design$db$tablename,updates=design$updates),
+ weights(design))
+ NextMethod("svycoxph",design)
+}
+
+svyvar.DBIsvydesign<-function(x,design,na.rm=FALSE,...){
+ design$variables<-getvars(x, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svyvar",design)
+}
+
+
+
+svykm.DBIsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svykm",design)
+}
+
+
+svykappa.DBIsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svykappa",design)
+}
+
+
+svysmooth.DBIsvydesign<-function(formula,design,method=c("locpoly","quantreg"),bandwidth,quantile,df,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svysmooth",design)
+}
+
+
+svychisq.DBIsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svychisq",design)
+}
+
+svyranktest.DBIsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svyranktest",design)
+}
+
+svyratio.DBIsvydesign<-function(numerator, denominator, design,...){
+ design$variables<-cbind(getvars(numerator,design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset),
+ getvars(denominator,design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset))
+ NextMethod("svyratio",design)
+
+}
+
+
+svyby.DBIsvydesign<-function(formula, by, design,...){
+ design$variables<-cbind(getvars(formula,design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset),
+ getvars(by,design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset))
+ class(design)<-setdiff(class(design),"DBIsvydesign")
+ svyby(formula,by,design,...)
+}
+
+svytable.DBIsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("svytable",design)
+}
+
+
+calibrate.DBIsvydesign<-function(design,formula,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates, subset=design$subset)
+ NextMethod("calibrate",design)
+}
+postStratify.DBIsvydesign<-function(design, strata, population, partial = FALSE, ...) .NotYetImplemented()
+
+
+
+
+subset.DBIsvydesign<-function (x, subset, ...)
+{
+ e <- substitute(subset)
+ x$variables<-getvars(make.formula(all.vars(e)), x$db$connection, x$db$tablename,updates=x$updates, subset=x$subset)
+ r <- eval(e, x$variables, parent.frame())
+ r <- r & !is.na(r)
+ x <- x[r, ]
+ x$call <- sys.call(-1)
+ x
+}
+
+
+
+
+
+dim.DBIsvydesign<-function(x){
+ w<-weights(x)
+ nrow<-sum(w!=0)
+ coln<-names(DBI::dbGetQuery(x$db$conn, paste("select * from", x$db$tablename, "limit 1")))
+ if (!is.null(x$updates)){
+ update.names<-do.call(c, lapply(x$updates, names))
+ ncol<-length(unique(c(coln,update.names)))
+ } else ncol<-length(coln)
+ c(nrow,ncol)
+}
+
+
+dim.DBIrepdesign<-function(x){
+ if (is.null(x$subset))
+ nrow <-nrow(x$repweights)
+ else
+ nrow<-length(x$subset)
+ coln<-names(DBI::dbGetQuery(x$db$conn, paste("select * from", x$db$tablename, "limit 1")))
+ if (!is.null(x$updates)){
+ update.names<-do.call(c, lapply(x$updates, names))
+ ncol<-length(unique(c(coln,update.names)))
+ } else ncol<-length(coln)
+ c(nrow,ncol)
+}
+
+dimnames.DBIsvydesign<-function(x){
+ rown<-rownames(x$cluster)[weights(x)!=0]
+ coln<-names(DBI::dbGetQuery(x$db$conn, paste("select * from", x$db$tablename, "limit 1")))
+ if (!is.null(x$updates)){
+ update.names<-do.call(c, lapply(x$updates, names))
+ coln<-unique(c(coln,update.names))
+ }
+ list(rown,coln)
+}
+
+
+dimnames.DBIrepdesign<-function(x){
+ if (is.null(x$subset))
+ rown<-rownames(x$cluster)
+ else
+ rown<-rownames(x$cluster)[x$subset]
+ coln<-names(DBI::dbGetQuery(x$db$conn, paste("select * from", x$db$tablename, "limit 1")))
+ if (!is.null(x$updates)){
+ update.names<-do.call(c, lapply(x$updates, names))
+ coln<-unique(c(coln,update.names))
+ }
+ list(rown,coln)
+}
+
+"[.DBIsvydesign"<-function (x, i, ..., drop = TRUE)
+{
+ if (!missing(i)) {
+ if (is.logical(i))
+ x$prob[!i] <- Inf
+ else if (is.numeric(i) && length(i))
+ x$prob[-i] <- Inf
+ else {
+ tmp <- x$prob[i, ]
+ x$prob <- rep(Inf, length(x$prob))
+ x$prob[i, ] <- tmp
+ }
+ index <- is.finite(x$prob)
+ psu <- !duplicated(x$cluster[index, 1])
+ tt <- table(x$strata[index, 1][psu])
+ if (any(tt == 1)) {
+ warning(sum(tt == 1), " strata have only one PSU in this subset.")
+ }
+
+ }
+ else {
+ if (!is.null(x$variables))
+ x$variables <- x$variables[, ..1, drop = FALSE]
+ }
+ x
+}
+
+
+"[.DBIrepdesign"<-function (x, i, j, drop = FALSE)
+{
+ if (!missing(i)) {
+ pwt <- x$pweights
+ if (is.data.frame(pwt))
+ pwt <- pwt[[1]]
+ x$pweights <- pwt[i]
+ x$repweights <- x$repweights[i, , drop = FALSE]
+ if (!is.null(x$selfrep))
+ x$selfrep <- x$selfrep[i]
+ if (is.null(x$subset))
+ x$subset<-(1:nrow(x$variables))[i]
+ else
+ x$subset<-x$subset[i]
+ if (!missing(j))
+ x$variables <- x$variables[i, j, drop = FALSE]
+ else x$variables <- x$variables[i, , drop = FALSE]
+ x$degf <- NULL
+ x$degf <- degf(x)
+ }
+ else {
+ x$variables <- x$variables[, j, drop = FALSE]
+ }
+ x
+}
+
+
+svrepdesign.character<-function (variables=NULL,repweights=NULL, weights=NULL,
+ data=NULL,type=c("BRR","Fay","JK1", "JKn","bootstrap","other"),
+ combined.weights=TRUE, rho=NULL, bootstrap.average=NULL,
+ scale=NULL,rscales=NULL,fpc=NULL, fpctype=c("fraction","correction"),
+ mse=getOption("survey.replicates.mse"),dbtype="SQLite", dbname,
+ ...)
+{
+
+ if (dbtype == "ODBC"){
+ if (dbname=="")
+ dbconn<-RODBC::odbcDriverConnect(dbname,...)
+ else
+ dbconn<-RODBC::odbcConnect(dbname,...)
+ } else {
+ db<-DBI::dbDriver(dbtype)
+ dbconn<- DBI::dbConnect(db, dbname,...)
+ }
+ if (is.character(repweights)){
+ allvars<-names(DBI::dbGetQuery(dbconn, paste("select * from",data,"limit 1")))
+ design.vars<-c(all.vars(weights),grep(repweights,allvars,value=TRUE))
+ } else {
+ design.vars<-c(all.vars(weights),all.vars(repweights))
+ }
+
+
+ design.query<-paste("select", paste(design.vars,collapse=","), "from", data)
+ if (dbtype=="ODBC")
+ design.data<-RODBC::sqlQuery(dbconn, design.query)
+ else
+ design.data<-DBI::dbGetQuery(dbconn, design.query)
+
+ rval<-svrepdesign(variables=variables,repweights=repweights, weights=weights, type=type,
+ data=design.data,
+ combined.weights=combined.weights, rho=rho, bootstrap.average=NULL,
+ scale=scale,rscales=rscales,fpc=fpc, fpctype=c("fraction","correction"), mse=mse)
+
+ rval$db<-list(dbname=dbname, tablename=data, connection=dbconn, dbtype=dbtype)
+ rval$variables<-NULL
+ rval$call<-sys.call(-1)
+ if (dbtype=="ODBC")
+ class(rval)<-c("ODBCrepdesign","ODBCsvydesign",class(rval))
+ else
+ class(rval)<-c("DBIrepdesign","DBIsvydesign",class(rval))
+ rval
+}
+
+print.DBIrepdesign<-function(x,...){
+ cat("DB-backed replicate weight design\n")
+ print.svyrep.design(x,...)
+ if (!checkConnection(x$db$connection, error=FALSE))
+ cat("<DBI Connection closed>\n")
+ invisible(x)
+}
+
+print.ODBCrepdesign<-function(x,...){
+ cat("ODBC-backed replicate weight design\n")
+ print.svyrep.design(x,...)
+ if (!checkConnection(x$db$connection, error=FALSE))
+ cat("<ODBC Connection closed>\n")
+ invisible(x)
+}
+
+summary.DBIrepdesign<-function(object,...){
+ summary.svyrep.design(object,...)
+}
+
+summary.ODBCrepdesign<-function(object,...){
+ summary.svyrep.design(object,...)
+}
+
+
diff --git a/R/anova.svyglm.R b/R/anova.svyglm.R
new file mode 100644
index 0000000..80e3978
--- /dev/null
+++ b/R/anova.svyglm.R
@@ -0,0 +1,162 @@
+
+oneanova.svyglm<-function(object,test,method){
+ tt<-terms(object)
+ tlbls<-attr(tt,"term.labels")
+ nt<-length(tlbls)
+ if (nt<2) return(NULL)
+ seqtests<-vector("list",nt)
+ if(test=="F") ddf<-NULL else ddf<-Inf
+ thismodel<-object
+ if (!("formula") %in% names(thismodel$call))
+ names(thismodel$call)[[2]] <- "formula"
+ for(i in nt:1){
+ thisterm<-tlbls[i]
+ seqtests[[i]]<-regTermTest(thismodel,tlbls[i],method=method,df=ddf)
+ thisformula<-make.formula(thisterm)[[2]]
+ thismodel <- eval(bquote(update(thismodel, . ~ . - (.(thisformula)))))
+ }
+ class(seqtests)<-"seqanova.svyglm"
+ attr(seqtests,"method")<-method
+ attr(seqtests,"test")<-test
+ seqtests
+ }
+
+print.seqanova.svyglm<-function(x,...){
+ isWald<-attr(x,"method")=="Wald"
+ isF<-attr(x,"test")=="F"
+
+ cat("Anova table: ")
+ if (isWald) cat("(Wald tests)\n") else cat(" (Rao-Scott LRT)\n")
+ print(x[[1]]$mcall)
+
+ terms<-sapply(x,"[[","test.terms")
+ stats<-if(isF && isWald) sapply(x,"[[","Ftest") else sapply(x,"[[","chisq")
+ if(!isWald) stats<-cbind(stats,DEff=sapply(x,function(xi) mean(xi$lambda)))
+ df<-sapply(x,"[[","df")
+ p<-sapply(x,"[[","p")
+ if (!isF){
+ rval<-cbind(stats,df,p)
+ } else {
+ ddf<-sapply(x,"[[","ddf")
+ rval<-cbind(stats,df,ddf,p)
+ }
+ rownames(rval)<-terms
+ printCoefmat(rval,tst.ind=1,zap.ind=2:3,has.Pvalue=TRUE)
+ invisible(x)
+}
+
+
+SD<-function(x) if (NCOL(x)>1) apply(x,2,sd) else sd(x)
+
+anova.svyglm<-function(object, object2=NULL,test=c("F","Chisq"),method=c("LRT","Wald"),tolerance=1e-5,...,force=FALSE){
+ test<-match.arg(test)
+ method<-match.arg(method)
+ if(is.null(object2)) ## sequential tests
+ return(oneanova.svyglm(object,test,method))
+
+ t1<-attr(terms(object),"term.labels")
+ t2<-attr(terms(object2),"term.labels")
+ if ((all(t1 %in% t2) || all(t2 %in% t1)) && !force){
+ ## symbolically nested, call regTermTest
+ biggerobject<-if(all(t1 %in% t2)) object2 else object
+ termdiff<-make.formula(if(all(t1 %in% t2)) setdiff(t2,t1) else setdiff(t1,t2))
+ if(test=="F") ddf<-NULL else ddf<-Inf
+ return(regTermTest(biggerobject,termdiff,df=ddf,method=method))
+ }
+
+ ## not symbolically nested, need to project explicitly
+ X<-model.matrix(object)
+ Z<-model.matrix(object2)
+ if (nrow(X)!=nrow(Z)) stop("models have different numbers of observations")
+ if (ncol(X)>ncol(Z)) {
+ tmp<-X
+ X<-Z
+ Z<-tmp
+ bigger<-1
+ } else bigger<-2
+ if (any(sapply(suppressWarnings(summary(lm(X~Z))), "[[","sigma")/(tolerance+SD(X))>tolerance)) stop("models not nested")
+
+ XX<-matrix(nrow=nrow(Z),ncol=ncol(Z))
+ xform<-lm(Z[,1]~X+0)
+ XX[,1]<-resid(xform)
+ for(i in 2:ncol(Z)){
+ XX[,i]<-resid(xform<-lm(Z[,i]~X+Z[,1:(i-1)]+0))
+ }
+ colkeep<-colMeans(abs(XX))/(tolerance+colMeans(abs(Z))) > tolerance
+ XX<-XX[,colkeep,drop=FALSE]
+ index<-ncol(X)+(1:ncol(XX))
+
+ ## and now need to refit the model
+ ## ugly, but svyglm demands that all variables are in the design argument.
+ ## We do know the fitted values at convergence, so one iteration suffices.
+ mu<-if(bigger==1) fitted(object) else fitted(object2)
+ eta<-if(bigger==1) object$linear.predictors else object2$linear.predictors
+ offset<-if(bigger==1) object$offset else object2$offset
+ if (is.null(offset)) offset<-0
+ pweights<-weights(object$survey.design,"sampling")
+ y<-object$y
+ if (length(pweights)!=length(y)){
+ pweights<-pweights[pweights>0]
+ if (length(pweights)!=length(y)) stop("number of observations does not match design")
+ }
+ pweights<-pweights/mean(pweights)
+ ywork<-eta-offset+(y-mu)/object$family$mu.eta(eta)
+ wwork<-((pweights * object$family$mu.eta(eta)^2)/object$family$variance(mu))
+ wlm<-lm.wfit(cbind(X,XX),ywork,wwork)
+ p1<-1:wlm$rank
+ Ainv<-chol2inv(wlm$qr$qr[p1,p1,drop=FALSE])
+
+ estfun<-cbind(X,XX)*wwork*((y-mu)/object$family$mu.eta(eta))
+ design<-object$survey.design
+ if (inherits(design, "survey.design2"))
+ V<-svyrecvar(estfun %*% Ainv, design$cluster, design$strata,
+ design$fpc, postStrata = design$postStrata)
+ else if (inherits(design, "twophase"))
+ V<-twophasevar(estfun %*% Ainv, design)
+ else if (inherits(design, "twophase2"))
+ V<-twophase2var(estfun %*% Ainv, design)
+ else if (inherits(design, "pps"))
+ V<-ppsvar(estfun %*% Ainv, design)
+
+ V<-V[index,index]
+ df<-min(object$df.residual, object2$df.residual)
+
+ if(method=="LRT"){
+ V0<-Ainv[index,index]
+ chisq <- if(bigger==1) deviance(object2) - deviance(object) else deviance(object)-deviance(object2)
+ misspec <- eigen(solve(V0) %*% V, only.values = TRUE)$values
+
+
+ if (test=="Chisq")
+ p <- pchisqsum(chisq, rep(1, length(misspec)), misspec,
+ method = "sad", lower.tail = FALSE)
+ else p <- pFsum(chisq, rep(1, length(misspec)), misspec,
+ ddf = df, method = "sad", lower.tail = FALSE)
+
+ rval <- list(call = sys.call(), chisq = chisq,
+ df = length(index), p = p,
+ lambda = misspec, ddf = df, mcall=if(bigger==1) object$call else object2$call,
+ test.terms=if(bigger==1) c(setdiff(t1,t2),"-",setdiff(t2,t1)) else c(setdiff(t2,t1),"-",setdiff(t1,t2))
+ )
+
+ class(rval)<-"regTermTestLRT"
+ } else {
+ ## method=Wald
+ beta<- wlm$coefficients[index]
+ chisq<-crossprod(beta,solve(V,beta))
+ if (test=="Chisq"){
+ p<-pchisq(chisq,df=length(index),lower.tail=FALSE)
+ } else {
+ p<-pf(chisq/length(index),df1=length(index),df2=df,lower.tail=FALSE)
+ }
+ rval <- list(call = sys.call(), Ftest = chisq/length(index),
+ df = length(index), p = p,
+ ddf = df, mcall=if(bigger==1) object$call else object2$call,
+ test.terms=if(bigger==1) c(setdiff(t1,t2),"-",setdiff(t2,t1)) else c(setdiff(t2,t1),"-",setdiff(t1,t2))
+ )
+ class(rval)<-"regTermTest"
+
+ }
+
+ rval
+}
diff --git a/R/bootstrap.R b/R/bootstrap.R
new file mode 100644
index 0000000..22e42bb
--- /dev/null
+++ b/R/bootstrap.R
@@ -0,0 +1,103 @@
+
+subbootstratum<-function(psu,replicates){
+ upsu<-sample(unique(psu))
+ n<-length(upsu)
+ replicate(replicates,
+ table(factor(sample(upsu, length(upsu)-1,replace=TRUE),
+ levels=unique(psu))))*n/(n-1)
+}
+
+bootstratum<-function(psu, popsize, replicates){
+ upsu<-sample(unique(psu))
+ if (is.null(popsize)){
+ replicate(replicates,
+ table(factor(sample(upsu,length(upsu),replace=TRUE),
+ levels=unique(psu))))
+ } else {
+ replicate(replicates,
+ table(factor(sample(rep(upsu,length=popsize), length(upsu)),
+ levels=unique(psu))))
+ }
+}
+
+bootweights<-function(strata, psu, replicates=50, fpc=NULL,
+ fpctype=c("population","fraction","correction"),
+ compress=TRUE){
+
+ fpctype<-match.arg(fpctype)
+
+ index<-match(psu,psu[!duplicated(psu)])
+ upsu<-unique(psu)
+
+ strata<-as.character(strata)
+ weights<-matrix(nrow=length(upsu),ncol=replicates)
+ ustrata<-strata[!duplicated(psu)]
+ ufpc<-fpc[!duplicated(psu)]
+
+ for(s in unique(ustrata)){
+ this.stratum<-ustrata==s
+ npsu<-length(unique(upsu[this.stratum]))
+
+ if (is.null(fpc))
+ weights[this.stratum,]<-bootstratum(upsu[this.stratum],NULL,replicates)
+ else {
+ this.fpc<-ufpc[this.stratum]
+ if (length(unique(this.fpc))>1)
+ stop("More than one fpc in stratum",s)
+ this.fpc<-this.fpc[1]
+ if (fpctype=="population" && this.fpc<npsu)
+ stop("Population size smaller than sample size in stratum",s)
+ this.fpc <-switch(fpctype,
+ population=this.fpc,
+ fraction=npsu/this.fpc,
+ correction=1-npsu/this.fpc)
+ if (this.fpc> 100*npsu)
+ warning("Sampling fraction <1% in stratum",s," treated as zero.")
+ weights[this.stratum,]<-bootstratum(upsu[this.stratum],
+ popsize=this.fpc,replicates=replicates)
+ }
+ }
+
+ ## harmonic mean of stratum sizes
+ psu.per.strata<-1/mean(1/table(ustrata))
+
+ if (compress){
+ rw<-list(weights=weights,index=index)
+ class(rw)<-"repweights_compressed"
+ } else {
+ rw<-weights[index,]
+ }
+
+ list(repweights=rw, scale=psu.per.strata/((psu.per.strata-1)*(replicates-1)),
+ rscales=rep(1,replicates))
+}
+
+subbootweights<-function(strata, psu, replicates=50,
+ compress=TRUE){
+
+
+ index<-match(psu,psu[!duplicated(psu)])
+ upsu<-unique(psu)
+
+ strata<-as.character(strata)
+ weights<-matrix(nrow=length(upsu),ncol=replicates)
+ ustrata<-strata[!duplicated(psu)]
+
+ for(s in unique(ustrata)){
+ this.stratum<-ustrata==s
+ npsu<-length(unique(upsu[this.stratum]))
+
+ weights[this.stratum,]<-subbootstratum(upsu[this.stratum],replicates)
+
+ }
+
+ if (compress){
+ rw<-list(weights=weights,index=index)
+ class(rw)<-"repweights_compressed"
+ } else {
+ rw<-weights[index,]
+ }
+
+ list(repweights=rw, scale=1/(replicates-1),
+ rscales=rep(1,replicates))
+}
diff --git a/R/chisqsum.R b/R/chisqsum.R
new file mode 100644
index 0000000..bea7ca5
--- /dev/null
+++ b/R/chisqsum.R
@@ -0,0 +1,110 @@
+
+pchisqsum<-function(x,df,a,lower.tail=TRUE,
+ method=c("satterthwaite","integration","saddlepoint")){
+
+ satterthwaite<-function(a,df){
+ if(any(df>1)){
+ a<-rep(a,df)
+ }
+ tr<-mean(a)
+ tr2<-mean(a^2)/(tr^2)
+
+ list(scale=tr*tr2, df=length(a)/tr2)
+ }
+
+ ## chisqphi<-function(t, df=1,a=1){
+ ## (1-(0+2i)*a*t)^(-df/2)
+ ## }
+
+ ## make.integrand<-function(x,DF,A){
+ ## m<-length(DF)
+
+ ## function(t){
+ ## n<-length(t)
+ ## tmp<-matrix(chisqphi(rep(t,each=m),rep(DF,n),rep(A,n) ),ncol=n)
+ ## phi<-apply(tmp,2,prod)
+ ## rval<-Im(phi*exp(-(0+1i)*t*x)/(pi*t))
+ ## rval[t==0]<-x/(pi)
+ ## rval
+ ## }
+
+ ## }
+
+
+ method<-match.arg(method)
+ sat<-satterthwaite(a,df)
+ guess<-pchisq(x/sat$scale,sat$df,lower.tail=lower.tail)
+
+ if (method=="satterthwaite")
+ return(guess)
+
+ method<-match.arg(method)
+ if (method=="integration" && !(requireNamespace("CompQuadForm",quietly=TRUE))){
+ warning("Package 'CompQuadForm' not found, using saddlepoint approximation")
+ method<-"saddlepoint"
+ }
+
+
+ abstol<-guess/1000
+ abstol<-pmax(1e-9, abstol)
+ reltol<-rep(1/1000,length(abstol))
+
+ if (method=="integration"){
+ if (any(a<=0)){
+ for(i in seq(length=length(x))){
+ f<-CompQuadForm::davies(x[i],a,df,acc=1e-7)
+ if (f$ifault>0) warning("Probable loss of accuracy ")
+ guess[i]<-f$Qq
+ }
+ if(any(guess<1e-6)) warning("Probable loss of accuracy ")
+ } else{
+ for(i in seq(length=length(x))){
+ ## version 1.4.2 of CompQuadForm changed the *name* of the result. Grr.
+ temp<-CompQuadForm::farebrother(x[i],a,df)
+ guess[i]<-if ("Qq" %in% names(temp)) temp$Qq else temp$res
+ }
+ if(any(guess<1e-9)) warning("Probable loss of accuracy ")
+ }
+ if (lower.tail)
+ guess<-1-guess
+
+ return(guess)
+ } else if (method=="saddlepoint"){
+ for(i in seq(length=length(x))){
+ lambda<-rep(a,df)
+ sad<-sapply(x,saddle,lambda=lambda)
+ if (lower.tail) sad<-1-sad
+ guess<-ifelse(is.na(sad),guess,sad)
+ }
+ return(guess)
+ }
+}
+
+saddle<-function(x,lambda){
+ d<-max(lambda)
+ lambda<-lambda/d
+ x<-x/d
+ k0<-function(zeta) -sum(log(1-2*zeta*lambda))/2
+ kprime0<-function(zeta) sapply(zeta, function(zz) sum(lambda/(1-2*zz*lambda)))
+ kpprime0<-function(zeta) 2*sum(lambda^2/(1-2*zeta*lambda)^2)
+ n<-length(lambda)
+ if (any(lambda < 0)) {
+ lmin <- max(1/(2 * lambda[lambda < 0])) * 0.99999
+ } else if (x>sum(lambda)){
+ lmin <- -0.01
+ } else {
+ lmin<- -length(lambda)/(2*x)
+ }
+ lmax<-min(1/(2*lambda[lambda>0]))*0.99999
+
+ hatzeta <- uniroot(function(zeta) kprime0(zeta) - x,
+ lower = lmin, upper = lmax, tol = 1e-08)$root
+
+ w<-sign(hatzeta)*sqrt(2*(hatzeta*x-k0(hatzeta)))
+ v<-hatzeta*sqrt(kpprime0(hatzeta))
+ if (abs(hatzeta)<1e-4)
+ NA
+ else
+ pnorm(w+log(v/w)/w, lower.tail=FALSE)
+}
+
diff --git a/R/compressweights.R b/R/compressweights.R
new file mode 100755
index 0000000..8771fd7
--- /dev/null
+++ b/R/compressweights.R
@@ -0,0 +1,56 @@
+
+"dim.repweights_compressed"<-function(x){
+ c(length(x$index),ncol(x$weights))
+}
+
+"dimnames.repweights_compressed"<-function(x){
+ list(names(x$index), colnames(x$weights))
+}
+
+"[.repweights_compressed"<-function(x,i,...,drop=FALSE){
+ if (!missing(i)){
+ x$index<-x$index[i]
+ if(!missing(..1))
+ x$weights<-x$weights[,..1,drop=FALSE]
+ } else{
+ ## this is faster than just subscripting x$weights (!)
+ x<-list(index=x$index,
+ weights=x$weights[,...,drop=FALSE])
+ class(x)<-c("repweights_compressed","repweights")
+ }
+ x
+}
+
+"as.matrix.repweights_compressed"<-function(x,...){
+ x$weights[x$index,,drop=FALSE]
+}
+
+"as.vector.repweights_compressed"<-function(x,...){
+ as.vector(x$weights[x$index,])
+}
+
+"as.matrix.repweights"<-function(x,...){
+ x
+}
+
+compressWeights<-function(rw,...){
+ UseMethod("compressWeights")
+}
+
+"compressWeights.repweights_compressed"<-function(rw,...){
+ compressWeights(as.matrix(rw))
+}
+
+compressWeights.default<-function(rw,...){
+ mat<-as.matrix(rw)
+ tmp<-apply(mat,1,function(x) paste(x,collapse="\r"))
+ unq<-!duplicated(mat)
+ rval<-list(weights=mat[unq,],index=match(tmp,tmp[unq]))
+ class(rval)<-c("repweights_compressed","repweights")
+ rval
+}
+
+compressWeights.svyrep.design<-function(rw,...){
+ rw$repweights<-compressWeights(rw$repweights,...)
+ rw
+}
diff --git a/R/confint.R b/R/confint.R
new file mode 100644
index 0000000..d0cf94a
--- /dev/null
+++ b/R/confint.R
@@ -0,0 +1,32 @@
+format.perc<-function (probs, digits) {
+ paste(format(100 * probs, trim = TRUE,
+ scientific = FALSE, digits = digits), "%")
+}
+
+confint.svystat<-function (object, parm, level = 0.95, df=Inf,...) {
+ tconfint(object, parm, level,df)
+}
+
+confint.svrepstat<-confint.svystat
+confint.svyby<-confint.svystat
+confint.svyratio<-confint.svystat
+
+
+tconfint<-function (object, parm, level = 0.95, df=Inf)
+{
+ cf <- coef(object)
+ pnames <- names(cf)
+ if (missing(parm))
+ parm <- pnames
+ else if (is.numeric(parm))
+ parm <- pnames[parm]
+ a <- (1 - level)/2
+ a <- c(a, 1 - a)
+ pct <- format.perc(a, 3)
+ fac <- qt(a, df=df)
+ ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm,
+ pct))
+ ses <- unlist(SE(object))[parm %in% pnames]
+ ci[] <- cf[parm] + ses %o% fac
+ ci
+}
diff --git a/R/count.R b/R/count.R
new file mode 100644
index 0000000..8b6e5a1
--- /dev/null
+++ b/R/count.R
@@ -0,0 +1,34 @@
+unwtd.count<-function(x, design,...){
+
+ if (inherits(x, "formula")) {
+ mf <- model.frame(x, model.frame(design), na.action = na.pass)
+ xx <- lapply(attr(terms(x), "variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0 + .(tt))), mf)
+ )
+ cols <- sapply(xx, NCOL)
+ x <- matrix(nrow = NROW(xx[[1]]), ncol = sum(cols))
+ scols <- c(0, cumsum(cols))
+ for (i in 1:length(xx)) {
+ x[, scols[i] + 1:cols[i]] <- xx[[i]]
+ }
+ colnames(x) <- do.call("c", lapply(xx, colnames))
+ }
+ else if (typeof(x) %in% c("expression", "symbol"))
+ x <- eval(x, model.frame(design))
+ x <- as.matrix(x)
+ out<- weights(design,"sampling")==0
+ nas <- rowSums(is.na(x))
+
+ x <- x[(nas+out) == 0, , drop = FALSE]
+
+ rval<-NROW(x)
+ names(rval)<-"counts"
+ attr(rval,"var")<-matrix(0,1,1)
+ attr(rval,"statistic")<-"counts"
+ if (inherits(design,"svyrep.design"))
+ class(rval)<-"svrepstat"
+ else
+ class(rval)<-"svystat"
+ rval
+
+}
diff --git a/R/dbiupdate.R b/R/dbiupdate.R
new file mode 100644
index 0000000..1cc3869
--- /dev/null
+++ b/R/dbiupdate.R
@@ -0,0 +1,118 @@
+
+##
+## stored variable updates.
+##
+
+updatesInfilter<-function(varlist, updates){
+ if (is.null(updates)) return(list(varlist=varlist))
+ n<-length(updates)
+ v<-vector("list",n)
+ for(i in n:1){
+ if (any(idx<-(varlist %in% names(updates[[i]])))){
+ v[[i]]<-varlist[idx]
+ ups<-match(v[[i]], names(updates[[i]]))
+ varlist<-unique(c(varlist[!idx], do.call(c, lapply(updates[[i]][ups], "[[", "inputs"))))
+ }
+ }
+ list(varlist=varlist, history=v)
+}
+
+updatesOutfilter<-function(df, varlist,history, updates){
+ if (is.null(updates)) return(df)
+ if (all(sapply(history,length)==0)) return(df)
+ n<-length(updates)
+ for(i in 1:n){
+ if (mi<-length(history[[i]])){
+ outputs<-vector("list", mi)
+ for(j in 1:mi){
+ idx.j<-match(history[[i]][j],names(updates[[i]]))
+ outputs[[j]]<-eval(updates[[i]][[idx.j]]$expression, df)
+ }
+ names(outputs)<-history[[i]]
+ if (any(mod<-names(df) %in% names(outputs))){
+ df<-df[,!mod,drop=FALSE]
+ }
+ df<-cbind(df,outputs)
+ }
+ }
+ df[, names(df) %in% varlist,drop=FALSE]
+}
+
+checkConnection<-function(dbconnection, error=TRUE){
+ if (is(dbconnection,"DBIConnection")) {
+ if (!DBI::dbIsValid(dbconnection))
+ if (error)
+ stop("Database connection is closed")
+ else
+ return(FALSE)
+ } else{## RODBC
+ ## we aren't allowed to check odbc connections in a CRAN package
+ }
+ invisible(TRUE)
+}
+
+getvars<-function (formula, dbconnection, tables, db.only = TRUE, updates=NULL, subset=NULL)
+{
+
+ checkConnection(dbconnection)
+
+ if (is.null(formula))
+ return(NULL)
+
+ if (inherits(formula, "formula")) {
+ var0<- all.vars(formula)
+ } else if (is.character(formula)){
+ var0<-formula
+ } else {
+ return(formula)
+ }
+
+ infilter<-updatesInfilter(var0, updates)
+ if (db.only) {
+ in.db <- infilter$varlist
+ }
+ else {
+ query <- sub("@tab@", tables, "select * from @tab@ limit 1")
+ if (is(dbconnection,"DBIConnection"))
+ oneline <- DBI::dbGetQuery(dbconnection, query)
+ else ##ODBC
+ oneline <- RODBC::sqlQuery(dbconnection, query)
+ in.db <- infilter$varlist[infilter$varlist %in% names(oneline)]
+ }
+ query <- paste("select", paste(in.db, collapse = ", "), "from",
+ tables)
+
+ if (is(dbconnection, "DBIConnection"))
+ df <- DBI::dbGetQuery(dbconnection, query)
+ else ##ODBC
+ df<-RODBC::sqlQuery(dbconnection, query)
+
+ if (!is.null(subset)) df<-df[subset,,drop=FALSE]
+
+ df<-updatesOutfilter(df, var0, infilter$history, updates)
+
+ is.string <- sapply(df, is.character)
+ if (any(is.string)) {
+ for (i in which(is.string)) df[[i]] <- as.factor(df[[i]])
+ }
+ df
+ }
+
+
+update.DBIsvydesign<-function(object, ...){
+ dots <- substitute(list(...))[-1]
+ newnames <- names(dots)
+
+ updates<-lapply(dots, function(dot){
+ list(inputs=all.vars(dot),expression=dot)
+ })
+
+ if (is.null(object$updates))
+ object$updates<-list(updates)
+ else
+ object$updates<-c(object$updates, list(updates))
+ object
+}
+
+
+update.ODBCsvydesign<-update.DBIsvydesign
diff --git a/R/ftable.svystat.R b/R/ftable.svystat.R
new file mode 100755
index 0000000..a9d93c7
--- /dev/null
+++ b/R/ftable.svystat.R
@@ -0,0 +1,69 @@
+
+
+ftable.svystat<-function(x, rownames=NULL, ...){
+
+ m<-cbind(coef(x),SE(x))
+ if (is.null(rownames))
+ return(as.table(m))
+
+ statname<-if (is.list(x)) attr(x[[1]],"statistic") else attr(x,"statistic")
+
+ deff<-attr(x,"deff")
+ has.deff<-!is.null(deff)
+ if (has.deff)
+ m<-cbind(m,diag(deff))
+
+ rowdim<-sapply(rownames,length)
+
+ if (has.deff){
+ mm<-array(m,dim=c(rowdim,NCOL(m)),
+ dimnames=c(as.list(rownames),
+ list(c(statname,"SE","Deff"))))
+
+ ftable(mm,row.vars=length(rowdim)+0:1)
+ } else {
+ mm<-array(m,dim=c(rowdim,NCOL(m)),
+ dimnames=c(as.list(rownames),
+ list(c(statname,"SE"))))
+
+ ftable(mm,row.vars=length(rowdim)+0:1)
+ }
+
+}
+
+ftable.svrepstat<-ftable.svystat
+
+
+ftable.svyby <- function (x, ...)
+{
+ info <- attr(x, "svyby")
+ margins <- info$margins
+ dimnames <- lapply(x[, margins, drop = FALSE], levels)
+ dims <- sapply(dimnames, length)
+ dims <- c(dims, variable = info$nstats)
+ senames<-c(se="SE",cv="cv",cvpct="cv%",var="Var")[info$vartype]
+ if (info$vars || info$deffs) {
+ dims <- c(dims, 1 + info$vars + info$deffs)
+ dimnames <- c(dimnames,
+ list(sub("^statistic\\.(.*)$", "\\1", info$variables)),
+ list(c(info$statistic,
+ if (info$vars) senames,
+ if (info$deffs) "DEff")))
+ }
+ else if (info$nstats == 1) {
+ dimnames <- c(dimnames, list(info$statistic))
+ }
+ else {
+ dimnames <- c(dimnames, list(info$variables))
+ }
+ ## fix by Sergio Calva for ordering bug.
+ x <- x[do.call("order",x[,rev(margins),drop = FALSE]),]
+ rval <- array(as.matrix(x[, -margins, drop = FALSE]), dim = dims,
+ dimnames = dimnames)
+ ftable(rval, row.vars = c(1, length(dim(rval))))
+}
+
+if(FALSE){
+odfTable.svystat <- function(x,...) odfTable(as.data.frame(x),...)
+odfTable.table <- odfTable.matrix
+}
diff --git a/R/grake.R b/R/grake.R
new file mode 100644
index 0000000..3a6cd9e
--- /dev/null
+++ b/R/grake.R
@@ -0,0 +1,429 @@
+
+make.calfun<-function(Fm1,dF, name){
+ if (!identical(names(formals(Fm1)), c("u","bounds")))
+ stop("wrong argument names for Fm1")
+ if(!identical(names(formals(dF)), c("u","bounds")))
+ stop("wrong argument names for dF")
+ rval<-list(Fm1=Fm1, dF=dF, name=name)
+ class(rval)<-"calfun"
+ rval
+}
+
+print.calfun<-function(x,...) cat("calibration metric: ",x$name,"\n")
+
+calibrate<-function(design, ...) UseMethod("calibrate")
+
+calibrate.survey.design2<-function(design, formula, population,
+ aggregate.stage=NULL, stage=0, variance=NULL,
+ bounds=c(-Inf,Inf), calfun=c("linear","raking","logit"),
+ maxit=50, epsilon=1e-7, verbose=FALSE, force=FALSE, trim=NULL,
+ ...){
+
+ if(is.list(formula) && is.list(population)){
+ ## inputs as marginal totals, as in rake()
+ population<-margins2totals(formula,population)
+ formula<-as.formula(paste("~",paste(sapply(formula,function(f) paste(all.vars(f),collapse="*")),collapse="+")))
+ if (verbose){
+ print(formula)
+ print(population)
+ }
+ }
+
+
+ if (is.character(calfun)) calfun<-match.arg(calfun)
+ if (is.character(calfun) && calfun=="linear" && (bounds==c(-Inf,Inf))){
+ ## old code is better for ill-conditioned linear calibration
+ rval<-regcalibrate(design,formula,population,
+ aggregate.stage=aggregate.stage, stage=stage,
+ lambda=variance,...)
+ rval$call<-sys.call(-1)
+ return(rval)
+ }
+
+ if(is.character(calfun))
+ calfun<-switch(calfun,linear=cal.linear, raking=cal.raking, logit=cal.logit)
+ else
+ if(!inherits(calfun,"calfun"))
+ stop("'calfun' must be a string or of class 'calfun'.")
+
+ if (length(epsilon)!=1 && length(epsilon)!=length(population))
+ stop("'epsilon' must be a scalar or of the same length as 'population'")
+
+ if (!is.null(aggregate.stage)){
+ aggindex<-design$cluster[[aggregate.stage]]
+ }
+
+ expit<-function(x) 1-1/(1+exp(x))
+
+ ## calibration to population totals
+ mm<-model.matrix(formula, model.frame(formula, model.frame(design)))
+ ww<-weights(design)
+
+ if (!is.null(aggregate.stage)){
+ mm<-apply(mm,2,function(mx) ave(mx,aggindex))
+ ww<-ave(ww,aggindex)
+ }
+ whalf<-sqrt(ww)
+ sample.total<-colSums(mm*ww)
+
+ if(any(sample.total==0)){
+ ## drop columsn where all sample and population are zero
+ zz<-(population==0) & (apply(mm,2,function(x) all(x==0)))
+ mm<-mm[,!zz]
+ population<-population[!zz]
+ sample.total<-sample.total[!zz]
+ if (length(epsilon)>1) epsilon <- epsilon[!zz]
+ }
+
+
+ if (length(sample.total)!=length(population)){
+ print(sample.total)
+ print(population)
+ stop("Population and sample totals are not the same length.")
+ }
+ if(!is.null(names(population))){
+ if (!all(names(sample.total) %in% names(population)))
+ warning("Sampling and population totals have different names.")
+ else if (!all(names(sample.total) == names(population))){
+ warning("Sample and population totals reordered to make names agree: check results.")
+ population <- population[match(names(sample.total), names(population))]
+ }
+ }
+
+ tqr<-qr(mm*whalf)
+ if (!all(abs(qr.resid(tqr,whalf))<1e-10))
+ warning("G-calibration models must have an intercept")
+
+ g<-grake(mm,ww,calfun, bounds=bounds,population=population,
+ verbose=verbose,epsilon=epsilon,maxit=maxit)
+
+ if(!is.null(trim)) {
+ gnew<-pmax(trim[1], pmin(g, trim[2]))
+ outside<-g<trim[1] | g>trim[2]
+ if (any(outside)){
+ trimmings<-(g-gnew)*ww
+ gnew[!outside]<-gnew[!outside]+sum(trimmings)/sum(ww[!outside])
+ g<-gnew
+ attr(g,"failed")<-NULL
+ message(paste(sum(outside),"weights were trimmed"))
+ }
+ }
+ if (!is.null(attr(g,"failed"))){
+ if (!force) stop("Calibration failed")
+ }
+
+ design$prob<-design$prob/g
+
+ caldata <- list(qr=tqr, w=g*whalf, stage=0, index=NULL)
+
+ class(caldata) <- c("greg_calibration","gen_raking")
+
+ design$postStrata <- c(design$postStrata, list(caldata))
+ design$call <- sys.call(-1)
+
+ design
+}
+
+
+calibrate.svyrep.design<-function(design, formula, population,compress=NA,
+ aggregate.index=NULL, variance=NULL,
+ bounds=c(-Inf,Inf), calfun=c("linear","raking","logit"),
+ maxit=50, epsilon=1e-7, verbose=FALSE,force=FALSE, trim=NULL,
+ ...){
+
+ if(is.list(formula) && is.list(population)){
+ ## inputs as marginal totals, as in rake()
+ population<-margins2totals(formula,population)
+ formula<-as.formula(paste("~",paste(sapply(formula,function(f) paste(all.vars(f),collapse="*")),collapse="+")))
+ if (verbose){
+ print(formula)
+ print(population)
+ }
+ }
+
+ if (is.character(calfun)) calfun<-match.arg(calfun)
+ if (length(epsilon)!=1 && length(epsilon)!=length(population))
+ stop("'epsilon' must be a scalar or of the same length as 'population'")
+
+ if (is.character(calfun) && calfun=="linear" && (bounds==c(-Inf,Inf))){
+ ## old code is better for ill-conditioned linear calibration
+ rval<-regcalibrate(design,formula,population, compress=compress,
+ aggregate.index=aggregate.index,
+ lambda=variance,...)
+ rval$call<-sys.call(-1)
+ return(rval)
+ }
+
+ mf<-model.frame(formula, design$variables)
+ mm<-model.matrix(formula, mf)
+ ww<-design$pweights
+
+ repwt<-as.matrix(design$repweights)
+ if (!design$combined.weights)
+ repwt<-repwt*design$pweights
+
+ if (inherits(aggregate.index,"formula")){
+ if (length(aggregate.index)!=2)
+ stop("aggregate.index must be a one-sided formula")
+ aggregate.index<-model.frame(aggregate.index, design$variables)
+ if (NCOL(aggregate.index)>1)
+ stop("aggregate.index must specify a single variable")
+ aggregate.index<-aggregate.index[[1]]
+ }
+
+ if (!is.null(aggregate.index)){
+ if (sqrt(max(ave(ww,aggregate.index,FUN=var),na.rm=TRUE))>1e-2*mean(ww))
+ warning("Sampling weights are not constant within clusters defined by aggregate.index")
+ mm<-apply(mm,2,function(mx) ave(mx,aggregate.index))
+ ww<-ave(ww,aggregate.index)
+ repwt<-apply(repwt,2,function(wx) ave(wx, aggregate.index))
+ }
+ whalf<-sqrt(ww)
+
+ sample.total<-colSums(mm*ww)
+
+ if(any(sample.total==0)){
+ ## drop columsn where all sample and population are zero
+ zz<-(population==0) & (apply(mm,2,function(x) all(x==0)))
+ mm<-mm[,!zz]
+ population<-population[!zz]
+ sample.total<-sample.total[!zz]
+ if (length(epsilon)>1) epsilon <- epsilon[!zz]
+ }
+
+
+
+ if (length(sample.total)!=length(population)){
+ print(sample.total)
+ print(population)
+ stop("Population and sample totals are not the same length.")
+ }
+ if (!is.null(names(population))){
+ if (!all(names(sample.total) %in% names(population)))
+ warning("Sample and population totals have different names.")
+ else if (!all(names(sample.total) == names(population))){
+ warning("Sample and population totals reordered to make names agree: check results.")
+ population <- population[match(names(sample.total), names(population))]
+ }
+ }
+
+ if(is.character(calfun))
+ calfun<-switch(calfun, linear=cal.linear, raking=cal.raking, logit=cal.logit)
+ else if (!inherits(calfun,"calfun"))
+ stop("'calfun' must be a string or a 'calfun' object")
+ gtotal <- grake(mm,ww,calfun,bounds=bounds,population=population,
+ verbose=verbose, epsilon=epsilon, maxit=maxit)
+
+ if(!is.null(trim)) {
+ gnew<-pmax(trim[1], pmin(gtotal, trim[2]))
+ outside<-gtotal<trim[1] | gtotal>trim[2]
+ if (any(outside)){
+ trimmings<-(gtotal-gnew)*ww
+ gnew[!outside]<-gnew[!outside]+sum(trimmings)/sum(ww[!outside])
+ gtotal<-gnew
+ attr(gtotal,"failed")<-NULL
+ message(paste(sum(outside),"weights were trimmed"))
+ }
+ }
+ if (!force && !is.null(attr(gtotal,"failed"))) stop("Calibration failed")
+
+ design$pweights<-design$pweights*gtotal
+
+ for(i in 1:NCOL(repwt)){
+ wwi<-repwt[,i]
+ if(verbose) cat("replicate = ",i,"\n")
+ g<-grake(mm, wwi, calfun, eta=rep(0,NCOL(mm)), bounds=bounds, population=population,
+ epsilon=epsilon, verbose=verbose, maxit=maxit)
+
+ if(length(trim)==2){
+ outside<-(g<trim[1]) | (g>trim[2])
+ if (any(outside)) {
+ gnew<-pmax(trim[1],pmin(g,trim[2]))
+ trimmings<-(g-gnew)*wwi
+ gnew[!outside]<-gnew[!outside]+sum(trimmings)/sum(wwi[!outside])
+ g<-gnew
+ }}
+
+ repwt[,i]<-as.vector(design$repweights[,i])*g
+ }
+
+ if (!design$combined.weights)
+ repwt<-repwt/gtotal
+
+ if (compress ||
+ (is.na(compress && inherits(design$repweights,"repweights_compressed")))){
+ repwt<-compressWeights(repwt)
+ }
+
+ design$repweights<-repwt
+ design$call<-sys.call(-1)
+
+ design
+}
+
+cal.linear<-make.calfun(function(u,bounds) pmin(pmax(u+1,bounds[1]),bounds[2])-1,
+ function(u, bounds) as.numeric(u<bounds[2]-1 & u>bounds[1]-1),
+ "linear calibration")
+cal.raking<-make.calfun(function(u,bounds) pmin(pmax(exp(u),bounds[1]),bounds[2])-1,
+ function(u, bounds) ifelse(u<bounds[2]-1 & u>bounds[1]-1,exp(u),0),
+ "raking")
+cal.logit<-make.calfun(
+ function(u,bounds) {
+ if (any(!is.finite(bounds))) stop("Logit calibration requires finite bounds")
+ L <- bounds[1]
+ U <- bounds[2]
+ A <- (U-L)/((U-1)*(1-L))
+ eAu <- exp(A*u)
+ ( L*(U-1) + U*(1-L)*eAu)/(U-1+(1-L)*eAu)-1
+ },
+ function(u,bounds) {
+ L <- bounds[1]
+ U <- bounds[2]
+ A <- (U-L)/((U-1)*(1-L))
+ eAu <- exp(A*u)
+ U*(1-L)*eAu*A/(U-1+(1-L)*eAu)-( (L*(U-1)+U*(1-L)*eAu)*( (1-L)*eAu*A ) )/(U-1+(1-L)*eAu)^2
+ },
+ "logit calibration"
+ )
+
+grake<-function(mm,ww,calfun,eta=rep(0,NCOL(mm)),bounds,population,epsilon, verbose,maxit){
+
+ sample.total<-colSums(mm*ww)
+ if(!inherits(calfun,"calfun")) stop("'calfun' must be of class 'calfun'")
+
+ Fm1<-calfun$Fm1
+ dF<-calfun$dF
+
+ xeta<-drop(mm%*%eta)
+ g<-1+Fm1(xeta, bounds)
+ deriv <- dF(xeta, bounds)
+ iter<-1
+
+ ## pre-scaling for people starting with no weights
+ SOMETHRESHOLD<-20
+ scales<-population/sample.total
+ if (min(scales)> SOMETHRESHOLD){
+ scale<-mean(scales)
+ ww<-ww*scale
+ sample.total<-sample.total*scale
+ if(verbose) message(paste("Sampling weights rescaled by",signif(scale,3)))
+ if (any(is.finite(bounds))) warning(paste("Bounds were set but will be interpreted after rescaling by",signif(scale,3)))
+ } else scale<-NULL
+
+ repeat({
+ Tmat<-crossprod(mm*ww*deriv, mm)
+
+ misfit<-(population-sample.total-colSums(mm*ww*Fm1(xeta, bounds)))
+ deta<-MASS::ginv(Tmat, tol=256*.Machine$double.eps)%*%misfit
+ eta<-eta+deta
+
+ xeta<- drop(mm%*%eta)
+ g<-1+Fm1(xeta, bounds)
+ deriv <- dF(xeta, bounds)
+ while(iter<maxit && any(!is.finite(g),!is.finite(deriv))){
+ iter<-iter+1
+ deta<-deta/2
+ eta<-eta-deta
+ xeta<- drop(mm%*%eta)
+ g<-1+Fm1(xeta, bounds)
+ deriv <- dF(xeta, bounds)
+ if(verbose) print("Step halving")
+ }
+ misfit<-(population-sample.total-colSums(mm*ww*Fm1(xeta, bounds)))
+
+ if (verbose)
+ print(misfit)
+
+ if (all(abs(misfit)/(1+abs(population))<epsilon)) break
+
+ iter <- iter+1
+ if (iter>maxit) {
+ achieved<-max((abs(misfit)/(1+abs(population))))
+ warning("Failed to converge: eps=",achieved," in ",iter," iterations")
+ attr(g,"failed")<-achieved
+ break;
+ }
+ })
+
+ if (!is.null(scale)) g<-g*scale
+ attr(g,"eta")<-eta
+ g
+}
+
+
+trimWeights<-function(design, upper=Inf,lower=-Inf, ...){
+ UseMethod("trimWeights")
+}
+
+
+trimWeights.survey.design2<-function(design, upper=Inf, lower= -Inf, strict=FALSE,...){
+ pw<-weights(design,"sampling")
+ outside<-pw<lower | pw>upper
+ if (!any(outside)) return(design)
+ pwnew<-pmax(lower,pmin(pw, upper))
+ trimmings<-pw-pwnew
+ pwnew[!outside]<-pwnew[!outside]+sum(trimmings)/sum(!outside)
+ design$prob<-1/pwnew
+ design$call<-sys.call()
+ design$call[[1]]<-as.name(.Generic)
+ if (strict) ## ensure that the trimmings don't push anything outside the limits
+ trimWeights(design, upper,lower, strict=TRUE)
+ else
+ design
+}
+
+trimWeights.svyrep.design<-function(design, upper=Inf, lower= -Inf, compress=FALSE,...){
+ pw<-weights(design,"sampling")
+ outside<-pw<lower | pw>upper
+ if (any(outside)) {
+ pwnew<-pmax(lower,pmin(pw, upper))
+ trimmings<-pw-pwnew
+ pwnew[!outside]<-pwnew[!outside]+sum(trimmings)/sum(!outside)
+ design$prob<-1/pw
+ }
+ rw<-weights(design, "analysis")
+ outside<-rw<lower | rw>upper
+ if (any(outside)) {
+ rwnew<-pmax(lower,pmin(rw, upper))
+ trimmings<-rw-rwnew
+ rwnew<-rwnew[!outside]+t(t(!outside)+colSums(trimmings)/colSums(!outside))
+ if (compress)
+ design$repweights<-compressWeights(rwnew)
+ else
+ design$repweights<-rwnew
+ design$combined.weights<-TRUE
+ }
+
+ design
+}
+
+
+margins2totals<-function(formulas, totals){
+ totals<-mapply(onemargin2totals,formulas,totals,SIMPLIFY=FALSE)
+ totaln<-do.call(c,totals)
+ totalorder<-do.call(c,lapply(totals,function(x) attr(x,"order")))
+ totaln<-totaln[order(totalorder)]
+
+ totaln[!duplicated(names(totaln))]
+ }
+
+
+onemargin2totals<-function(formula,total){
+ if (is.table(total)) total<-as.data.frame(total)
+ if (!is.data.frame(total) && is.vector(total) && (length(formula[[2]])==1)){
+ ## just a vector
+ total<-as.table(total)
+ d<-dimnames(total)
+ names(d)<-deparse(formula[[2]])
+ total<-as.data.frame(total)
+ }
+ if (!is.data.frame(total)) stop("incorrect format for population totals")
+
+ newformula<-as.formula(paste("Freq",paste(all.vars(formula),collapse="*"),sep="~"))
+ mf<-model.frame(newformula,as.data.frame(total))
+ mm<-model.matrix(newformula,mf)
+ intorder<-c(1,attr(terms(newformula),"order")[attr(mm,"assign")])
+ rval<-colSums(mf$Freq*mm)
+ attr(rval,"order")<-intorder
+ rval
+}
diff --git a/R/greg.R b/R/greg.R
new file mode 100644
index 0000000..755e034
--- /dev/null
+++ b/R/greg.R
@@ -0,0 +1,223 @@
+
+regcalibrate<-function(design, ...) UseMethod("regcalibrate")
+
+is.calibrated<-function(design){ !is.null(design$postStrata)}
+
+##
+## unbounded linear calibration using qr decomposition: less sensitive to
+## collinearity than Deville & Sarndal's Newton algorithm.
+##
+regcalibrate.survey.design2<-function(design, formula, population,
+ stage=NULL, lambda=NULL, aggregate.stage=NULL,...){
+
+ if (is.null(stage))
+ stage<-if (is.list(population)) 1 else 0
+
+ if (!is.null(aggregate.stage)){
+ aggindex<-design$cluster[[aggregate.stage]]
+ }
+
+ if(stage==0){
+ ## calibration to population totals
+ mm<-model.matrix(formula, model.frame(formula, model.frame(design)))
+ ww<-weights(design)
+ if (is.null(lambda))
+ sigma2<-rep(1,nrow(mm))
+ else
+ sigma2<-drop(mm%*%lambda)
+
+ if (!is.null(aggregate.stage)){
+ mm<-apply(mm,2,function(mx) ave(mx,aggindex))
+ ww<-ave(ww,aggindex)
+ sigma2<-ave(sigma2,aggindex)
+ }
+ whalf<-sqrt(ww)
+ sample.total<-colSums(mm*ww)
+
+ if(any(sample.total==0)){
+ ## drop columsn where all sample and population are zero
+ zz<-(population==0) & (apply(mm,2,function(x) all(x==0)))
+ mm<-mm[,!zz]
+ population<-population[!zz]
+ sample.total<-sample.total[!zz]
+ }
+
+
+ if (length(sample.total)!=length(population))
+ stop("Population and sample totals are not the same length.")
+
+ if (!is.null(names(population)) && any(names(sample.total)!=names(population)))
+ warning("Sample and population totals have different names.")
+
+ tqr<-qr(mm*whalf/sqrt(sigma2))
+
+ ## not needed
+ ##if (is.null(lambda) && !all(abs(qr.resid(tqr,whalf*sigma2)/sigma2) <1e-5))
+ ## warning("Calibration models with constant variance must have an intercept")
+
+ g<-rep(1,NROW(mm))
+
+ Tmat<-crossprod(mm*whalf/sqrt(sigma2))
+
+ tT<-solve(Tmat,population-sample.total)
+
+ g<-drop(1+mm%*%tT/sigma2)
+
+
+ design$prob<-design$prob/g
+
+ caldata<- list(qr=tqr, w=g*whalf*sqrt(sigma2), stage=0, index=NULL)
+
+ } else {
+ ## Calibration within clusters (Sarndal's Case C)
+ if (stage>NCOL(design$cluster))
+ stop("This design does not have stage",stage)
+
+ if (!is.null(aggregate.stage)){
+ stop("aggregate= not implemented for calibration within clusters")
+ }
+
+ if (!all(length(population[[1]])==sapply(population,length)))
+ stop("Population totals are not all the same length")
+
+ clusters<-unique(design$cluster[,stage])
+ nc<-length(clusters)
+
+ caldata<-list(qr=vector("list",nc), w=vector("list",nc),
+ stage=stage,index=as.character(clusters))
+
+ mm<-model.matrix(formula, model.frame(formula, model.frame(design)))
+
+ if (is.null(lambda))
+ sigma2<-rep(1,nrow(mm))
+ else
+ sigma2<-drop(mm%*%lambda)
+
+ if(NCOL(mm)!=length(population[[1]]))
+ stop("Population and sample totals are not the same length.")
+
+ if (any(colnames(mm)!=names(population[[1]])))
+ warning("Sample and population totals have different names.")
+
+ stageweights<-1/apply(design$allprob[,1:stage,drop=FALSE],1,prod)
+ if (any(duplicated(design$cluster[!duplicated(stageweights),stage])))
+ stop("Weights at stage", stage, "vary within sampling units")
+
+ cwhalf<-sqrt(weights(design)/stageweights)
+ dwhalf<-sqrt(weights(design))
+ tqr<-qr(mm)
+
+ ## not needed
+ ## if (is.null(lambda) && !all(abs(qr.resid(tqr,sigma2)) <1e-3))
+ ## stop("Calibration models with constant variance must have an intercept")
+
+ for (i in 1:length(clusters)){
+ cluster<-clusters[[i]]
+ these<-which(cluster == as.character(design$cluster[,stage]))
+ mmi<-mm[these,,drop=FALSE]
+ sample.total<-colSums(mmi*cwhalf[these]*cwhalf[these])
+
+ if(any(sample.total==0)){
+ ## drop columsn where all sample and population are zero
+ zz<-(population[[i]]==0) & (apply(mmi,2,function(x) all(x==0)))
+ mmi<-mmi[,!zz,drop=FALSE]
+ population[[i]]<-population[[i]][!zz]
+ sample.total<-sample.total[!zz]
+ }
+
+ tqr<-qr(mmi*cwhalf[these]/sqrt(sigma2[these]))
+ Tmat<-crossprod(mmi*cwhalf[these]/sqrt(sigma2[these]))
+ tT<-solve(Tmat,population[[i]]-sample.total)
+ g<-drop(1+mmi%*%tT/sigma2[these])
+ design$prob[these]<-design$prob[these]/g
+ caldata$qr[[i]]<-tqr
+ caldata$w[[i]]<-g*stageweights[these]*sqrt(sigma2[these])*cwhalf[these]^2
+ }
+ }
+ class(caldata)<-"greg_calibration"
+
+ design$postStrata<-c(design$postStrata, list(caldata))
+ design$call<-sys.call(-1)
+
+ design
+}
+
+
+regcalibrate.svyrep.design<-function(design, formula, population,compress=NA,lambda=NULL,
+ aggregate.index=NULL,...){
+ mf<-model.frame(formula, design$variables)
+ mm<-model.matrix(formula, mf)
+ ww<-design$pweights
+ if (is.null(lambda))
+ sigma2<-rep(1,nrow(mm))
+ else
+ sigma2<-drop(mm%*%lambda)
+
+ repwt<-as.matrix(design$repweights)
+ if (!design$combined.weights)
+ repwt<-repwt*design$pweights
+
+ if (inherits(aggregate.index,"formula")){
+ if (length(aggregate.index)!=2)
+ stop("aggregate.index must be a one-sided formula")
+ aggregate.index<-model.frame(aggregate.index, design$variables)
+ if (NCOL(aggregate.index)>1)
+ stop("aggregate.index must specify a single variable")
+ aggregate.index<-aggregate.index[[1]]
+ }
+
+ if (!is.null(aggregate.index)){
+ if (sqrt(max(ave(ww,aggregate.index,FUN=var),na.rm=TRUE))>1e-2*mean(ww))
+ warning("Sampling weights are not constant within clusters defined by aggregate.index")
+ mm<-apply(mm,2,function(mx) ave(mx,aggregate.index))
+ ww<-ave(ww,aggregate.index)
+ sigma2<-ave(sigma2,aggregate.index)
+ repwt<-apply(repwt,2,function(wx) ave(wx, aggregate.index))
+ }
+ whalf<-sqrt(ww)
+
+ sample.total<-colSums(mm*ww)
+
+ if(any(sample.total==0)){
+ ## drop columsn where all sample and population are zero
+ zz<-(population==0) & (apply(mm,2,function(x) all(x==0)))
+ mm<-mm[,!zz]
+ population<-population[!zz]
+ sample.total<-sample.total[!zz]
+ }
+
+ if (length(sample.total)!=length(population))
+ stop("Population and sample totals are not the same length.")
+ if (!is.null(names(population)) && any(names(sample.total)!=names(population)))
+ warning("Sample and population totals have different names.")
+
+ Tmat<-crossprod(mm*whalf/sqrt(sigma2))
+
+ tT<-solve(Tmat,population-sample.total)
+
+ gtotal<-drop(1+mm%*%tT/sigma2)
+ design$pweights<-design$pweights*gtotal
+
+ for(i in 1:NCOL(repwt)){
+ whalf<-sqrt(repwt[,i])
+ Tmat<-crossprod(mm*whalf/sqrt(sigma2))
+ sample.total<-colSums(mm*whalf*whalf)
+ g<-drop(1+mm%*%solve(Tmat,population-sample.total)/sigma2)
+ repwt[,i]<-as.vector(design$repweights[,i])*g
+ }
+
+ if (!design$combined.weights)
+ repwt<-repwt/gtotal
+
+ if (compress ||
+ (is.na(compress && inherits(design$repweights,"repweights_compressed")))){
+ repwt<-compressWeights(repwt)
+ }
+
+ design$repweights<-repwt
+ design$call<-sys.call(-1)
+ design$degf<-NULL
+ design$degf<-degf(design)
+
+ design
+}
diff --git a/R/ht.R b/R/ht.R
new file mode 100644
index 0000000..abe9039
--- /dev/null
+++ b/R/ht.R
@@ -0,0 +1,35 @@
+
+
+htvar.list<-function(xcheck, Dcheck){
+ rval<-sapply(Dcheck, function(stagei)
+ {htvar.matrix(rowsum(xcheck,stagei$id),stagei$dcheck)})
+ rval
+}
+
+## used in twophase2var()
+htvar.matrix<-function(xcheck, Dcheck){
+ if (is.null(dim(xcheck)))
+ xcheck<-as.matrix(xcheck)
+ rval<-apply(xcheck,2, function(xicheck)
+ apply(xcheck,2, function(xjcheck)
+ as.matrix(Matrix::crossprod(xicheck, Dcheck%*%xjcheck))
+ ))
+ if(is.null(dim(rval))) dim(rval)<-c(1,1)
+ rval
+}
+
+## used in ppsvar, twophase2var
+ygvar.matrix<-function(xcheck,Dcheck){
+ ht<-htvar.matrix(xcheck,Dcheck)
+ if (is.null(dim(xcheck))){
+ corr <- sum(Dcheck%*%(xcheck*xcheck))
+ } else {
+ corr <- apply(xcheck,2, function(xicheck)
+ apply(xcheck,2, function(xjcheck)
+ sum(Dcheck%*%(xicheck*xjcheck))
+ ))
+ }
+ rval<-ht-corr
+}
+
+
diff --git a/R/loglin.R b/R/loglin.R
new file mode 100644
index 0000000..9cc4b65
--- /dev/null
+++ b/R/loglin.R
@@ -0,0 +1,195 @@
+svyloglin<-function(formula,design,...) UseMethod("svyloglin",design)
+
+withOptions<-function(optlist,expr){
+ oldopt<-options(optlist)
+ on.exit(options(oldopt))
+ expr<-substitute(expr)
+ eval.parent(expr)
+ }
+
+tr<-function(m)sum(diag(m))
+tr2<-function(m) sum(m*m)
+
+
+svyloglin.survey.design<-function(formula,design,...){
+ if (length(formula)!=2) stop("needs a one-sided formula")
+ mdata<-model.frame(design)[,all.vars(formula)]
+ mf<-model.frame(formula,mdata,na.action=na.pass)
+ n<-as.numeric(nrow(mf))
+ hatp<-svymean(~I(do.call(interaction,mf)),design,na.rm=TRUE)
+ dat<-do.call(expand.grid,lapply(mdata,function(x) sort(unique(x))))
+ dat<-as.data.frame(lapply(dat,as.factor))
+ dat$y<-coef(hatp)*n
+ ff<-update(formula, y~.)
+ m1<-withOptions(list(contrasts=c("contr.sum","contr.poly")),
+ glm(ff, data=dat,family=quasipoisson)
+ )
+ P1<-(diag(fitted(m1)/n)-tcrossprod(fitted(m1)/n))/n
+ V<-vcov(hatp)
+
+ XX<-model.matrix(m1)[,-1,drop=FALSE]
+ XX<-sweep(XX,2,colMeans(XX))
+ Vtheta<-solve(t(XX)%*%P1%*%XX)%*%(t(XX)%*%V%*%XX)%*%solve(t(XX)%*%P1%*%XX)/(n*n)
+
+ rval<-list(model=m1, var=Vtheta, prob.table=hatp,df.null=degf(design),n=n)
+ call<-sys.call()
+ call[[1]]<-as.name(.Generic)
+ rval$call<-call
+ class(rval)<-"svyloglin"
+ rval
+}
+
+print.svyloglin<-function(x,...) {cat("Loglinear model: ");print(x$call)}
+
+coef.svyloglin<-function(object,...,intercept=FALSE){
+ if (intercept)
+ coef(object$model)
+ else
+ coef(object$model)[-1]
+}
+vcov.svyloglin<-function(object,...) object$var
+deviance.svyloglin<-function(object,...) deviance(object$model)
+degf.svyloglin<-function(design,...) length(design$prob.table)-length(coef(design))-1
+terms.svyloglin<-function(x,...) terms(x$model,...)
+model.matrix.svyloglin<-function(object,...) model.matrix(object$model,...)
+
+update.svyloglin<-function(object, formula,...){
+ n<-object$n
+ model<-withOptions(list(contrasts=c("contr.sum","contr.sum")),
+ update(object$model, formula,data=object$model$model))
+ P1<-(diag(fitted(model)/n)-tcrossprod(fitted(model)/n))/n
+ V<-vcov(object$prob.table)
+
+ XX<-model.matrix(model)[,-1,drop=FALSE]
+ XX<-sweep(XX,2,colMeans(XX))
+ A<-solve(t(XX)%*%P1%*%XX)
+ B<-t(XX)%*%V%*%XX
+ Vtheta<-A%*%B%*%A/(n*n)
+
+ rval<-list(model=model, var=Vtheta, prob.table=object$prob.table,
+ df.null=object$df.null,n=n)
+ call<-sys.call()
+ call[[1]]<-as.name(.Generic)
+ rval$call<-call
+ class(rval)<-"svyloglin"
+ rval
+}
+
+anova.svyloglin<-function(object,object1,...,integrate=FALSE){
+ if(length(coef(object1))<length(coef(object))) {
+ tmp<-object1
+ object1<-object
+ object<-tmp
+ }
+ n<-object$n
+ m0<-object$model
+ m1<-object1$model
+ dfnull<-object$df.null
+ pi1<-fitted(m1)/n
+ pi0<-fitted(m0)/n
+ X1<-model.matrix(m0)[,-1,drop=FALSE]
+ X2<-model.matrix(m1)[,-1,drop=FALSE]
+ X1<-sweep(X1,2,colMeans(X1))
+ X2<-sweep(X2,2,colMeans(X2))
+ P1<-(diag(fitted(m1)/n)-tcrossprod(fitted(m1)/n))/n
+ P0<-(diag(fitted(m0)/n)-tcrossprod(fitted(m0)/n))/n
+ Psat<-(diag(coef(object$prob.table))-tcrossprod(coef(object$prob.table)))/n
+
+ if (!all.equal(object$prob.table,object1$prob.table))
+ stop("models must be fitted to the same data.")
+ V<-vcov(object$prob.table)
+
+ wX2sing<-X2-X1%*%solve(t(X1)%*%P1%*%X1,t(X1)%*%P1%*%X2)
+ qwX2<-qr(round(wX2sing,11))
+ wX2<-wX2sing[,qwX2$pivot[1:qwX2$rank],drop=FALSE]
+ Delta<-solve(t(wX2)%*%Psat%*%wX2,t(wX2)%*%V%*%wX2)
+
+ an<-anova(m0,m1)
+ dev<-an$Deviance[2]
+ if (integrate){
+ pdev<-pchisqsum(dev,rep(1,ncol(wX2)), a=eigen(Delta,only.values=TRUE,symmetric=TRUE)$values,
+ lower.tail=FALSE,method="integration")
+ } else pdev<-NA
+ pdev1<-pchisq(dev*ncol(wX2)/tr(Delta),df=ncol(wX2),lower.tail=FALSE)
+ pdev2a<-pf(dev/tr(Delta), tr(Delta)^2/tr2(Delta),dfnull*tr(Delta)^2/tr2(Delta),
+ lower.tail=FALSE)
+ pdevsad<-pchisqsum(dev,rep(1,ncol(wX2)), a=eigen(Delta,only.values=TRUE)$values,
+ lower.tail=FALSE,method="saddlepoint")
+
+ pearson<-n*sum( (pi1-pi0)^2/pi0 )
+ if (integrate){
+ pearsonp<-pchisqsum(pearson, rep(1,ncol(wX2)), a=eigen(Delta,only.values=TRUE,symmetric=TRUE)$values,
+ lower.tail=FALSE,method="integration")
+ } else pearsonp<-NA
+ prs1<-pchisq(pearson*ncol(wX2)/tr(Delta),df=ncol(wX2),lower.tail=FALSE)
+ prs2<-pchisq(pearson*ncol(wX2)/tr(Delta),df=tr(Delta)^2/tr2(Delta),lower.tail=FALSE)
+ prs2a<-pf(pearson/tr(Delta), tr(Delta)^2/tr2(Delta),dfnull*tr(Delta)^2/tr2(Delta),
+ lower.tail=FALSE)
+ pchisqsad<-pchisqsum(pearson, rep(1,ncol(wX2)), a=eigen(Delta,only.values=TRUE,symmetric=TRUE)$values,
+ lower.tail=FALSE,method="saddlepoint")
+
+ rval<-list(an, dev=list(dev=dev, p=c(pdev,pdev1,pdev2a,pdevsad)),
+ score=list(chisq=pearson,p=c(pearsonp,prs1,prs2a,pchisqsad)),
+ integrate=integrate,a=eigen(Delta,only.values=TRUE,symmetric=TRUE)$values,p=ncol(wX2))
+ class(rval)<-"anova.svyloglin"
+ rval
+}
+
+print.anova.svyloglin<-function(x,pval=c("F","saddlepoint","lincom","chisq"),...){
+ cat(attr(x[[1]],"heading"),"\n")
+ pval<-match.arg(pval)
+ if (pval=="lincom" && !x$integrate){
+ x$dev$p[1]<-pchisqsum(x$dev$dev, rep(1,x$p), a=x$a,
+ lower.tail=FALSE,method="integration")
+ x$score$p[1]<-pchisqsum(x$score$chisq, rep(1,x$p), a=x$a,
+ lower.tail=FALSE,method="integration")
+ }
+ cat("Deviance=",x$dev$dev,"p=",
+ switch(pval,lincom=x$dev$p[1],
+ saddlepoint=x$dev$p[4],
+ chisq=x$dev$p[2],
+ F=x$dev$p[3]),"\n")
+ cat("Score=",x$score$chisq,"p=",
+ switch(pval,lincom=x$score$p[1],
+ saddlepoint=x$score$p[4],
+ chisq=x$score$p[2],
+ F=x$score$p[3]),"\n")
+ invisible(x)
+}
+
+summary.svyloglin<-function(object,...){
+ rval<-list(ll=object)
+ class(rval)<-"summary.svyloglin"
+ rval
+ }
+
+print.summary.svyloglin<-function(x,...){
+ print(x$ll)
+ print(cbind(coef=coef(x$ll),
+ se=SE(x$ll),
+ p=2*pnorm(abs(coef(x$ll)/SE(x$ll)),lower.tail=FALSE)))
+ invisible(x)
+ }
+
+svyloglin.svyrep.design<-svyloglin.survey.design
+
+svyloglin.DBIsvydesign<-function (formula, design, ...)
+{
+ design$variables <- dropFactor(getvars(formula, design$db$connection,
+ design$db$tablename, updates = design$updates), weights(design))
+ class(design)<-c("survey.design2","survey.design")
+ rval<-svyloglin(formula,design)
+ rval$call<-sys.call()
+ rval$call[[1]]<-as.name(.Generic)
+ rval
+}
+svyloglin.ODBCsvydesign<-function (formula, design, ...)
+{
+ design$variables <- dropFactor(getvars(formula, design$db$connection,
+ design$db$tablename, updates = design$updates), weights(design))
+ class(design)<-c("survey.design2","survey.design")
+ rval<-svyloglin(formula,design)
+ rval$call<-sys.call()
+ rval$call[[1]]<-as.name(.Generic)
+ rval
+}
diff --git a/R/logrank.R b/R/logrank.R
new file mode 100644
index 0000000..20b37aa
--- /dev/null
+++ b/R/logrank.R
@@ -0,0 +1,212 @@
+svylogrank<-function(formula, design,rho=0,gamma=0,method=c("small","large","score"),...){
+ UseMethod("svylogrank",design)
+}
+
+print.svylogrank<-function(x,...){
+ m<-t(x)
+ rownames(m)=""
+ printCoefmat(m,has.Pvalue=TRUE,P.values=TRUE)
+ invisible(NULL)
+ }
+
+.logrank<-function(formula, design,rho=0,gamma=0){
+ nullformula<-update(formula,.~1)
+ S<-svykm(nullformula,design,se=FALSE)
+ epsilon<-min(diff(sort(unique(S$time))))/10
+ w<-approxfun(S$time+epsilon,S$surv^rho*(1-S$surv)^gamma,method="constant",rule=2)
+ environment(formula)<-environment()
+ coxmodel<-coxph(formula,data=model.frame(design), weights=weights(design,"sampling"),iter.max=0)
+ x<-model.matrix(coxmodel)
+
+ detail<-coxph.detail(coxmodel,riskmat=TRUE)
+ Y<-t(detail$riskmat)
+ dLambda<-detail$hazard
+ E<-as.matrix(detail$means)
+ N<-coxmodel$y[,"status"]
+
+ times<-coxmodel$y[,"time"]
+ U<-matrix(nrow=nrow(x),ncol=ncol(x))
+ index<-match(times[N==1],detail$time)
+ ZmEdN<- matrix(0,nrow=nrow(x),ncol=ncol(x))
+ ZmEdN[N==1,]<-x[N==1,,drop=FALSE]-E[index,]
+ for(p in 1:ncol(x)){
+ ZmE <- -outer(E[,p], x[,p], "-") ##times are rows, people are columns
+ U[,p]<- ZmEdN[,p]*w(times)- colSums(w(detail$time)*ZmE*dLambda*Y)
+ }
+ means <- svytotal(U,design)
+ zstat<-coef(means)/SE(means)
+ chisqstat<-coef(means)%*%solve(vcov(means),coef(means))
+
+ rval<-list(cbind(score=coef(means),se=SE(means),z=coef(means)/SE(means),p= 2*pnorm(-abs(coef(means)/SE(means)))),
+ c(chisq=chisqstat,p=pchisq(chisqstat,df=ncol(x),lower.tail=FALSE)))
+ class(rval)<-"svylogrank"
+ rval
+ }
+
+
+.biglogrank<-function(formula, design,rho=0,gamma=0){
+ nullformula<-update(formula,.~1)
+ S<-svykm(nullformula,design,se=FALSE)
+ epsilon<-min(diff(sort(unique(S$time))))/10
+ w<-approxfun(S$time+epsilon,S$surv^rho*(1-S$surv)^gamma,method="constant",rule=2)
+ environment(formula)<-environment()
+ coxmodel<-coxph(formula,data=model.frame(design), weights=weights(design,"sampling"),iter.max=0)
+ x<-model.matrix(coxmodel)
+
+ detail<-coxph.detail(coxmodel)
+
+ dLambda<-detail$hazard
+ E<-as.matrix(detail$means)
+ N<-coxmodel$y[,"status"]
+
+ times<-coxmodel$y[,"time"]
+ U<-matrix(nrow=nrow(x),ncol=ncol(x))
+ index<-match(times[N==1],detail$time)
+ ZmEdN<- matrix(0,nrow=nrow(x),ncol=ncol(x))
+ ZmEdN[N==1,]<-x[N==1,,drop=FALSE]-E[index,]
+ for(p in 1:ncol(x)){
+ U[,p]<- ZmEdN[,p]*w(times)
+ for (j in seq_along(detail$time)){
+ thistime<-detail$time[j]
+ ZmE <- x[,p]-E[j,p]
+ U[,p] <- U[,p] - w(thistime)*ZmE*dLambda[j]*(times>=thistime)
+ }
+ }
+ means <- svytotal(U,design)
+ zstat<-coef(means)/SE(means)
+ chisqstat<-coef(means)%*%solve(vcov(means),coef(means))
+
+ rval<-list(data.frame(score=coef(means),se=SE(means),z=coef(means)/SE(means),p= 2*pnorm(-abs(coef(means)/SE(means)))),
+ c(chisq=chisqstat,p=pchisq(chisqstat,df=ncol(x),lower.tail=FALSE)))
+ class(rval)<-"svylogrank"
+ rval
+
+ }
+
+svylogrank.survey.design2<-function(formula, design,rho=0,gamma=0,
+ method=c("small","large","score"),
+ ...){
+ method<-match.arg(method)
+ if (method=="small")
+ return(.logrank(formula,design, rho,gamma,...))
+ else if (method=="large")
+ return(.biglogrank(formula,design,rho,gamma,...))
+ if (rho!=0 || gamma!=0){
+ return(expandlogrank(formula,design,rho,gamma,...))
+ }
+
+ tms<-delete.response(terms(formula,specials="strata"))
+ findstrat<-untangle.specials(tms,"strata")
+ if(length(findstrat$terms))
+ tms<-tms[-findstrat$terms]
+ mf<-model.frame(tms,model.frame(design))
+ if(length(mf)>1)
+ stop("Only one grouping variable allowed")
+ if(!is.factor(mf[[1]]) && length(unique(mf[[1]]))>2)
+ stop("Grouping variable with more than 2 levels must be a factor")
+
+ b<-coef(svycoxph(formula,design,iter=1))
+ v<-vcov(svycoxph(formula,design,iter=0))
+ x2<-sum(b*solve(v,b))
+ rval<-c(z=b/sqrt(diag(v)), Chisq=x2, p=pchisq(x2,length(b),lower.tail=FALSE))
+ class(rval)<-"svylogrank"
+ rval
+ }
+
+svylogrank.twophase<-svylogrank.survey.design2
+svylogrank.twophase2<-svylogrank.survey.design2
+
+svylogrank.DBIsvydesign<-function (formula, design, ...)
+{
+ design$variables <- dropFactor(getvars(formula, design$db$connection,
+ design$db$tablename, updates = design$updates, subset = design$subset),
+ weights(design))
+ NextMethod("svylogrank", design)
+}
+
+svylogrank.ODBCsvydesign<-function (formula, design, ...)
+{
+ design$variables <- dropFactor(getvars(formula, design$db$connection,
+ design$db$tablename, updates = design$updates), weights(design))
+ NextMethod("svylogrank", design)
+}
+
+svylogrank.svyrep.design<-function(formula, design,rho=0,gamma=0,method=c("small","large","score"), ...){
+ method<-match.arg(method)
+ if (method=="small")
+ return(.logrank(formula,design, rho,gamma,...))
+ else if (method=="large")
+ return(.biglogrank(formula,design,rho,gamma,...))
+ if (rho!=0 || gamma!=0){
+ return(expandlogrank(formula,design,rho,gamma,...))
+ }
+ tms<-delete.response(terms(formula,specials="strata"))
+ findstrat<-untangle.specials(tms,"strata")
+ if(length(findstrat$terms))
+ tms<-tms[-findstrat$terms]
+ mf<-model.frame(tms,model.frame(design))
+ if(length(mf)>1)
+ stop("Only one grouping variable allowed")
+ if(!is.factor(mf[[1]]) && length(unique(mf[[1]]))>2)
+ stop("Grouping variable with more than 2 levels must be a factor")
+
+ rr<-withReplicates(design, function(w,df){
+ environment(formula)<-environment()
+ coef(coxph(formula,data=df,weights=w+1e-8,iter=1))
+ })
+
+ b<-unclass(rr)
+ attr(b,"var")<-NULL
+ v<-attr(rr,"var")
+ x2<-sum(b*solve(v,b))
+ rval<- c(z=b/sqrt(diag(as.matrix(v))), Chisq=x2, p=pchisq(x2,length(b),lower.tail=FALSE))
+ class(rval)<-"svylogrank"
+ rval
+ }
+
+
+
+expandlogrank<-function(formula, design, rho=0, gamma=0){
+ nullformula<-update(formula,.~1)
+ S<-svykm(nullformula,design,se=FALSE)
+ epsilon<-min(diff(sort(unique(S$time))))/10
+ w<-approxfun(S$time+epsilon,S$surv^rho*(1-S$surv)^gamma,method="constant",rule=2)
+ environment(formula)<-environment()
+ coxmodel<-coxph(formula,data=model.frame(design), weights=weights(design,"sampling"),iter.max=0)
+ mf<-model.frame(design)
+ detail<-coxph.detail(coxmodel)
+
+ if(attr(coxmodel$y,"type")=="right"){
+ mf$.time<-coxmodel$y[,"time"]
+ mf$.status<-coxmodel$y[,"status"]
+ mfsplit <- survSplit(mf, cut=detail$time, end=".time", event=".status", start=".start", id=".id", episode=".episode")
+ } else {
+ mf$.start<-coxmodel$y[,"start"]
+ mf$.time<-coxmodel$y[,"stop"]
+ mf$.status<-coxmodel$y[,"status"]
+ mfsplit <- survSplit(mf, cut=detail$time, end=".time", event=".status", start=".start", id=".id", episode=".episode")
+ }
+
+ formula[[2]]<-quote(Surv(.start,.time,.status))
+
+ mfsplit$.weights<-weights(design,"sampling")[match(mfsplit$.id, rownames(mf))]*w(mfsplit$.time)
+ expdesign<-svydesign(ids=eval(design$call$id), strata=eval(design$call$strata), data=mfsplit, weights=~.weights)
+ #svylogrank(formula,expdesign)
+ tms<-delete.response(terms(formula,specials="strata"))
+ findstrat<-untangle.specials(tms,"strata")
+ if(length(findstrat$terms))
+ tms<-tms[-findstrat$terms]
+ mf<-model.frame(tms,model.frame(expdesign))
+ if(length(mf)>1)
+ stop("Only one grouping variable allowed")
+ if(!is.factor(mf[[1]]) && length(unique(mf[[1]]))>2)
+ stop("Grouping variable with more than 2 levels must be a factor")
+
+ b<-coef(svycoxph(formula,expdesign,iter=1))
+ v<-vcov(svycoxph(formula,expdesign,iter=0))
+ x2<-sum(b*solve(v,b))
+ rval<-c(z=b/sqrt(diag(v)), Chisq=x2, p=pchisq(x2,length(b),lower.tail=FALSE))
+ class(rval)<-"svylogrank"
+ rval
+ }
+
diff --git a/R/margins.R b/R/margins.R
new file mode 100644
index 0000000..3beeeae
--- /dev/null
+++ b/R/margins.R
@@ -0,0 +1,186 @@
+
+marginpred<-function(model, adjustfor, predictat, ...) UseMethod("marginpred", model)
+
+##
+## Basic strategy: calibrate on ~model*adjustfor, to set interactions to zero
+##
+
+marginpred.svycoxph<-function(model, adjustfor, predictat, se=FALSE, ...){
+
+ if(NROW(predictat)==0) return(NULL)
+
+ design<-model$survey.design
+ ##if (inherits(design,"twophase")) stop("Two-phase designs not yet supported")
+ if (!is.null(model$na.action)) design<-design[-model$na.action,]
+
+ modelformula<-formula(model)
+ calformula<-eval(bquote( ~(.(modelformula[[3]]))*(.(adjustfor))))
+
+ adjmf<-model.frame(terms(adjustfor), model.frame(design))
+ adjmm<-model.matrix(terms(adjustfor), adjmf)
+ modelmm<-model.matrix(model)[,-1,drop=FALSE]
+ modelmm <- sweep(modelmm,2,model$means)
+
+ if (qr(modelmm)$rank<ncol(modelmm))
+ stop("model is singular")
+
+ qrmain<-qr(cbind(modelmm,adjmm))
+ if(qrmain$rank<(ncol(modelmm)+ncol(adjmm))){
+ if (qrmain$rank==ncol(modelmm))
+ stop("adjustment variables are all in model")
+ adjmm<-adjmm[, (qrmain$pivot[-(1:ncol(modelmm))]-ncol(modelmm))[1:(qrmain$rank-ncol(modelmm))],drop=FALSE]
+ }
+
+ mm<-matrix(ncol=ncol(modelmm)*ncol(adjmm),nrow=nrow(modelmm))
+ for(i in 1:ncol(modelmm)){
+ mm[,(i-1)*ncol(adjmm)+(1:ncol(adjmm))]<-adjmm*modelmm[,i]
+ }
+
+ pop<-as.vector(outer(colSums(adjmm*weights(design)),
+ colSums(modelmm*weights(design))/sum(weights(design))
+ )
+ )
+
+ g<-grake(mm,weights(design), calfun=cal.raking, bounds=c(0,Inf),
+ population=pop, epsilon=1e-4,maxit=100,verbose=FALSE)
+
+ if ( !is.null(attr(g,"failed"))) stop("Calibration failed")
+ design$prob<-design$prob/g
+
+ whalf<-sqrt(weights(design))
+ tqr<-qr(mm*whalf)
+ caldata <- list(qr=tqr, w=g*whalf, stage=0, index=NULL)
+
+ class(caldata) <- c("greg_calibration","gen_raking")
+
+ design$postStrata <- c(design$postStrata, list(caldata))
+ design$call <- sys.call(-1)
+
+ model<-eval(bquote(svycoxph(.(formula(model)), design=design)))
+ predict(model, newdata=predictat, se=se,type="curve",...)
+
+}
+
+
+marginpred.svykmlist<-function(model, adjustfor, predictat, se=FALSE, ...){
+
+ design<-eval.parent(attr(model, "call")$design)
+ formula<-formula(model)
+ if(!is.null(drop<-attr(model,"na.action")))
+ design<-design[-drop,]
+
+ if(NROW(predictat)==0) return(NULL)
+
+ ##if (inherits(design,"twophase")) stop("Two-phase designs not yet supported")
+
+ modelformula<-formula(model)
+ calformula<-eval(bquote( ~(.(modelformula[[3]]))*(.(adjustfor))))
+
+ adjmf <- model.frame(terms(adjustfor), model.frame(design),na.action=na.fail)
+ adjmm <- model.matrix(terms(adjustfor), adjmf)
+ adjmm <- sweep(adjmm, 2, colSums(adjmm*weights(design))/sum(weights(design)))
+ modelmf <- model.frame(terms(formula), model.frame(design),na.action=na.fail)
+ modelmm <- model.matrix(terms(formula), modelmf)[,-1,drop=FALSE]
+ modelmm <- sweep(modelmm, 2, colSums(modelmm*weights(design))/sum(weights(design)))
+
+ if (qr(modelmm)$rank<ncol(modelmm))
+ stop("model is singular")
+
+ qrmain<-qr(cbind(modelmm, adjmm))
+ if(qrmain$rank<(ncol(modelmm)+ncol(adjmm))){
+ if (qrmain$rank==ncol(modelmm))
+ stop("adjustment variables are all in model")
+ adjmm<-adjmm[, (qrmain$pivot[-(1:ncol(modelmm))]-ncol(modelmm))[1:(qrmain$rank-ncol(modelmm))],drop=FALSE]
+ }
+
+ mm<-matrix(ncol=ncol(modelmm)*ncol(adjmm),nrow=nrow(modelmm))
+ for(i in 1:ncol(modelmm)){
+ mm[,(i-1)*ncol(adjmm)+(1:ncol(adjmm))]<-adjmm*modelmm[,i]
+ }
+
+ pop<-as.vector(outer(colSums(adjmm*weights(design)),
+ colSums(modelmm*weights(design))/sum(weights(design))
+ )
+ )
+
+ g<-grake(mm,weights(design), calfun=cal.raking, bounds=c(0, Inf),
+ population=pop, epsilon=1e-4, maxit=100, verbose=FALSE)
+
+ if ( !is.null(attr(g,"failed"))) stop("Calibration failed")
+ design$prob<-design$prob/g
+
+ whalf<-sqrt(weights(design))
+ tqr<-qr(mm*whalf)
+ caldata <- list(qr=tqr, w=g*whalf, stage=0, index=NULL)
+
+ class(caldata) <- c("greg_calibration","gen_raking")
+
+ design$postStrata <- c(design$postStrata, list(caldata))
+ design$call <- sys.call(-1)
+
+ eval(bquote(svykm(.(formula), design=design,se=.(se))))
+}
+
+
+marginpred.svyglm<-function(model, adjustfor, predictat, ...){
+
+ design<-model$survey.design
+ formula<-formula(model)
+ if(!is.null(drop<-attr(model,"na.action")))
+ design<-design[-drop,]
+
+ if(NROW(predictat)==0) return(NULL)
+
+ ##if (inherits(design,"twophase")) stop("Two-phase designs not yet supported")
+
+ modelformula<-formula(model)
+ calformula<-eval(bquote( ~(.(modelformula[[3]]))*(.(adjustfor))))
+
+ adjmf <- model.frame(terms(adjustfor), model.frame(design),na.action=na.fail)
+ adjmm <- model.matrix(terms(adjustfor), adjmf)
+ adjmm <- sweep(adjmm, 2, colSums(adjmm*weights(design))/sum(weights(design)))
+ modelmm <- model.matrix(model)[,-1,drop=FALSE]
+ modelmm <- sweep(modelmm, 2, colSums(modelmm*weights(design))/sum(weights(design)))
+
+ if (qr(modelmm)$rank<ncol(modelmm))
+ stop("model is singular")
+
+ qrmain<-qr(cbind(modelmm, adjmm))
+ if(qrmain$rank<(ncol(modelmm)+ncol(adjmm))){
+ if (qrmain$rank==ncol(modelmm))
+ stop("adjustment variables are all in model")
+ adjmm<-adjmm[, (qrmain$pivot[-(1:ncol(modelmm))]-ncol(modelmm))[1:(qrmain$rank-ncol(modelmm))],drop=FALSE]
+ }
+
+ mm<-matrix(ncol=ncol(modelmm)*ncol(adjmm),nrow=nrow(modelmm))
+ for(i in 1:ncol(modelmm)){
+ mm[,(i-1)*ncol(adjmm)+(1:ncol(adjmm))]<-adjmm*modelmm[,i]
+ }
+
+ pop<-as.vector(outer(colSums(adjmm*weights(design)),
+ colSums(modelmm*weights(design))/sum(weights(design))
+ )
+ )
+
+ g<-grake(mm,weights(design), calfun=cal.raking, bounds=c(0,Inf),
+ population=pop, epsilon=1e-4, maxit=100, verbose=FALSE)
+
+ if ( !is.null(attr(g,"failed"))) stop("Calibration failed")
+ .design<-design
+ .design$prob<-.design$prob/g
+
+ whalf<-sqrt(weights(.design))
+ tqr<-qr(mm*whalf)
+ caldata <- list(qr=tqr, w=g*whalf, stage=0, index=NULL)
+
+ class(caldata) <- c("greg_calibration","gen_raking")
+
+ design$postStrata <- c(design$postStrata, list(caldata))
+ design$call <- sys.call(-1)
+
+ call<-model$call
+ call$design<-quote(.design)
+ newmodel<-eval(call)
+ predict(newmodel,newdata=predictat, ...)
+}
+
diff --git a/R/mrb.R b/R/mrb.R
new file mode 100644
index 0000000..a734329
--- /dev/null
+++ b/R/mrb.R
@@ -0,0 +1,52 @@
+## Rescaled multistage bootstrap
+## Preston http://www.statcan.gc.ca/pub/12-001-x/2009002/article/11044-eng.pdf
+##
+
+mrbweights<-function(clusters,stratas,fpcs, replicates=50, multicore=getOption("survey.multicore")){
+ nstages<-NCOL(clusters)
+ if (is.null(fpcs$popsize)){
+ warning("Design is sampled with replacement: only first stage used")
+ fpcs$popsize<-matrix(Inf, ncol=1,nrow=NROW(clusters))
+ nstages<-1
+ }
+
+ if (multicore & !requireNamespace("parallel", quietly=TRUE))
+ multicore<-FALSE
+ do.it<-if(multicore) parallel::mclapply else lapply
+
+ weightlist<-do.it(1:replicates, function(k){
+ weights<-matrix(1,nrow=NROW(clusters),ncol=nstages)
+ kept<-rep(TRUE, NROW(clusters))
+ cumffs<-rep(1,NROW(clusters))
+ for(i in 1:nstages){
+ ustrata<-unique(stratas[,i])
+ nstrata<-length(ustrata)
+ for(j in 1:nstrata){
+ thisstratum<-stratas[,i]==ustrata[j]
+ su <- unique(clusters[thisstratum & kept,i] )
+ n <-length(su)
+ nstar<-floor(n/2)
+ cumff<-cumffs[thisstratum][1]
+ if (nstar==0) {
+ wstar<-0
+ keep<- rep(FALSE,sum(thisstratum))
+ } else {
+ fpc<- fpcs$sampsize[thisstratum,i][1]/fpcs$popsize[thisstratum,i][1]
+ lambda<-sqrt(cumff*nstar*(1-fpc)/(n-nstar))
+ keep<-clusters[thisstratum,i] %in% sample(su,nstar)
+ wstar<-(-lambda+lambda*(n/nstar)*keep)
+ }
+ weights[thisstratum, i]<-wstar*weights[thisstratum, i]
+ if (nstar>0 & i<nstages){
+ weights[thisstratum & kept,(i+1):nstages]<-weights[thisstratum & kept,(i+1):nstages]*sqrt(n/nstar)*keep
+ }
+ kept[thisstratum] <- kept[thisstratum] & keep
+ cumffs[thisstratum]<-cumffs[thisstratum] * fpc
+ }
+
+ }
+ rowSums(weights)
+ })
+ list(repweights=(1+do.call(cbind,weightlist)), scale=1, rscales=rep(1/(replicates-1),replicates))
+}
+
diff --git a/R/mse.R b/R/mse.R
new file mode 100644
index 0000000..c35b22e
--- /dev/null
+++ b/R/mse.R
@@ -0,0 +1,12 @@
+
+mse<-function(repstat, design){
+ v<-attr(repstat,"var")
+ center<-attr(v,"means")
+ if ((length(v)!=length(center)^2) && (length(v)==length(center))){
+ attr(repstat,"var")<-vcov(repstat)+(center-coef(repstat))^2*sum(design$rscales)*design$scale
+ } else {
+ attr(repstat,"var")<-as.matrix(vcov(repstat)+outer((center-coef(repstat)))*sum(design$rscales)*design$scale)
+ }
+ repstat
+ }
+
diff --git a/R/multistage.R b/R/multistage.R
new file mode 100755
index 0000000..2b36b74
--- /dev/null
+++ b/R/multistage.R
@@ -0,0 +1,821 @@
+##
+## Recursive estimation of linearisation variances
+## in multistage samples.
+##
+
+svydesign<-function(ids, probs = NULL, strata = NULL, variables = NULL,
+ fpc = NULL, data=NULL, nest = FALSE, check.strata = !nest,
+ weights = NULL,pps=FALSE,...){
+ UseMethod("svydesign", data)
+ }
+
+svydesign.default<-function(ids,probs=NULL,strata=NULL,variables=NULL, fpc=NULL,
+ data=NULL, nest=FALSE, check.strata=!nest,weights=NULL,pps=FALSE,
+ variance=c("HT","YG"),...){
+ variance<-match.arg(variance)
+ if(is.character(pps)){
+ a<-match.arg(pps,c("brewer","overton","other"))
+ if (!(pps %in% c("brewer","other")))
+ return(pps_design(ids=ids,probs=probs, strata=strata,variables=variables, fpc=fpc,
+ data=data,method=a,call=sys.call(-1),variance=variance,...))
+ } else if (!is.logical(pps)){
+ return(pps_design(ids=ids,probs=probs, strata=strata,variables=variables, fpc=fpc,
+ data=data,method=pps,call=sys.call(-1),variance=variance,...))
+ }
+
+ if (!is.character(pps) || pps!="other"){
+ if (variance!="HT")
+ stop("Only variance='HT' supported for this design")
+ }
+
+ ## less memory-hungry version for sparse tables
+ interaction<-function (..., drop = TRUE) {
+ args <- list(...)
+ narg <- length(args)
+ if (narg == 1 && is.list(args[[1]])) {
+ args <- args[[1]]
+ narg <- length(args)
+ }
+
+ ls<-sapply(args,function(a) length(levels(a)))
+ ans<-do.call("paste",c(lapply(args,as.character),sep="."))
+ ans<-factor(ans)
+ return(ans)
+
+ }
+
+ na.failsafe<-function(message="missing values in object"){
+ function(object,...){
+ if (NCOL(object)==0)
+ object
+ else {
+ ok <- complete.cases(object)
+ if (all(ok))
+ object
+ else stop(message)
+ }
+ }
+ }
+
+ na.id<-na.failsafe("missing values in `id'")
+ if(inherits(ids,"formula")) {
+ mf<-substitute(model.frame(ids,data=data, na.action=na.id))
+ ids<-eval.parent(mf)
+ if (ncol(ids)==0) ## formula was ~1
+ ids<-data.frame(id=1:nrow(ids))
+ } else{
+ if (is.null(ids))
+ stop("Must provide ids= argument")
+ else
+ ids<-na.id(data.frame(ids))
+ }
+
+ na.prob<-na.failsafe("missing values in `prob'")
+ if(inherits(probs,"formula")){
+ mf<-substitute(model.frame(probs,data=data,na.action=na.prob))
+ probs<-eval.parent(mf)
+ }
+
+ na.weight<-na.failsafe("missing values in `weights'")
+ if(inherits(weights,"formula")){
+ mf<-substitute(model.frame(weights,data=data,na.action=na.weight))
+ weights<-eval.parent(mf)
+ } else if (!is.null(weights))
+ weights<-na.weight(data.frame(weights))
+ if(!is.null(weights)){
+ if (!is.null(probs))
+ stop("Can't specify both sampling weights and probabilities")
+ else
+ probs<-as.data.frame(1/as.matrix(weights))
+ }
+
+
+
+ na.strata<-na.failsafe("missing values in `strata'")
+ if (!is.null(strata)){
+ if(inherits(strata,"formula")){
+ mf<-substitute(model.frame(strata,data=data, na.action=na.strata))
+ strata<-eval.parent(mf)
+ }
+ if (!is.list(strata))
+ strata<-data.frame(strata=strata)
+ has.strata<-TRUE
+ } else {
+ has.strata <-FALSE
+ strata<-na.strata(as.data.frame(matrix(1, nrow=NROW(ids), ncol=NCOL(ids))))
+ }
+
+
+ if (inherits(variables,"formula")){
+ mf<-substitute(model.frame(variables,data=data,na.action=na.pass))
+ variables <- eval.parent(mf)
+ } else if (is.null(variables)){
+ variables<-data
+ } else
+ variables<-do.call("data.frame",variables)
+
+
+ na.fpc<-na.failsafe("missing values in `fpc'")
+ if (inherits(fpc,"formula")){
+ mf<-substitute(model.frame(fpc,data=data,na.action=na.fpc))
+ fpc<-eval.parent(mf)
+ }
+
+ ## check for only one PSU: probably a typo
+ if ((length(unique(ids[,1]))==1) && !(nest && has.strata)){
+ stop("Design has only one primary sampling unit")
+ }
+
+ ## force subclusters nested in clusters
+ if (NCOL(ids)>1){
+ N<-ncol(ids)
+ for(i in 2:N){
+ ids[,i]<-do.call("interaction", ids[,1:i,drop=FALSE])
+ }
+ }
+ ## force clusters nested in strata
+ if (nest && has.strata && NCOL(ids)){
+ N<-NCOL(ids)
+ NS<-NCOL(strata)
+ for(i in 1:N)
+ ids[,i]<-do.call("interaction",
+ c(strata[,1:min(i,NS),drop=FALSE], ids[,i,drop=FALSE]))
+ }
+
+ ## check if clusters nested in strata
+ if (check.strata && nest)
+ warning("No point in check.strata=TRUE if nest=TRUE")
+ if(check.strata && !is.null(strata) && NCOL(ids)){
+ sc<-(rowSums(table(ids[,1],strata[,1])>0))
+ if(any(sc>1)) stop("Clusters not nested in strata at top level; you may want nest=TRUE.")
+ }
+
+ ## force substrata nested in clusters
+ N<-ncol(ids)
+ NS<-ncol(strata)
+ if (N>1){
+ for(i in 2:N)
+ strata[,i]<-interaction(strata[,min(i,NS)], ids[,i-1])
+ }
+
+ ## PPS: valid choices currently are FALSE and "brewer"
+ if (is.logical(pps) && pps) stop("'pps' must be FALSE or a character string")
+ if (is.character(pps)) {
+ pps<-TRUE
+ }
+
+ ## Finite population correction: specified per observation
+ ## Also incorporates design sample sizes formerly in nPSU
+
+ if (!is.null(fpc) && !is.numeric(fpc) && !is.data.frame(fpc))
+ stop("fpc must be a matrix or dataframe or NULL")
+
+ fpc<-as.fpc(fpc,strata, ids, pps=pps)
+
+ ## if FPC specified, but no weights, use it for weights
+ if (is.null(probs) && is.null(weights)){
+ if (is.null(fpc$popsize)){
+ if (missing(probs) && missing(weights))
+ warning("No weights or probabilities supplied, assuming equal probability")
+ probs<-rep(1,nrow(ids))
+ } else {
+ probs<-1/weights(fpc, final=FALSE)
+ }
+ }
+
+
+ if (is.numeric(probs) && length(probs)==1)
+ probs<-rep(probs, NROW(variables))
+
+ if (length(probs)==0) probs<-rep(1,NROW(variables))
+
+ if (NCOL(probs)==1) probs<-data.frame(probs)
+
+ rval<-list(cluster=ids)
+ rval$strata<-strata
+ rval$has.strata<-has.strata
+ rval$prob<- apply(probs,1,prod)
+ rval$allprob<-probs
+ rval$call<-match.call()
+ rval$variables<-variables
+ rval$fpc<-fpc
+ rval$call<-sys.call(-1)
+ rval$pps<-pps
+ class(rval)<-c("survey.design2","survey.design")
+ rval
+}
+
+onestrat<-function(x,cluster,nPSU,fpc, lonely.psu,stratum=NULL,stage=1,cal=cal){
+
+ if (is.null(fpc))
+ f<-rep(1,NROW(x))
+ else{
+ f<-ifelse(fpc==Inf, 1, (fpc-nPSU)/fpc)
+ }
+
+ if (nPSU>1)
+ scale<-f*nPSU/(nPSU-1)
+ else
+ scale<-f
+ if (all(f<0.0000001))## self-representing stratum
+ return(matrix(0,NCOL(x),NCOL(x)))
+
+ scale<-scale[!duplicated(cluster)]
+
+ x<-rowsum(x,cluster)
+ nsubset<-nrow(x)
+
+ if (nsubset<nPSU) {
+ ##can't be PPS, so scale must be a constant
+ x<-rbind(x,matrix(0,ncol=ncol(x),nrow=nPSU-nrow(x)))
+ scale<-rep(scale[1],NROW(x))
+ }
+ if (lonely.psu!="adjust" || nsubset>1 ||
+ (nPSU>1 & !getOption("survey.adjust.domain.lonely")))
+ x<-sweep(x, 2, colMeans(x), "-")
+
+ if (nsubset==1 && nPSU>1 && getOption("survey.adjust.domain.lonely")){
+ warning("Stratum (",stratum,") has only one PSU at stage ",stage)
+ if (lonely.psu=="average" && getOption("survey.adjust.domain.lonely"))
+ scale<-NA
+ }
+
+ if (nPSU>1){
+ return(crossprod(x*sqrt(scale)))
+ } else {
+ rval<-switch(lonely.psu,
+ certainty=crossprod(x*sqrt(scale)),
+ remove=crossprod(x*sqrt(scale)),
+ adjust=crossprod(x*sqrt(scale)),
+ average=NA*crossprod(x),
+ fail= stop("Stratum (",stratum,") has only one PSU at stage ",stage),
+ stop("Can't handle lonely.psu=",lonely.psu)
+ )
+ rval
+ }
+}
+
+
+onestage<-function(x, strata, clusters, nPSU, fpc, lonely.psu=getOption("survey.lonely.psu"),stage=0, cal){
+ stratvars<- tapply(1:NROW(x), list(factor(strata)), function(index){
+ onestrat(x[index,,drop=FALSE], clusters[index],
+ nPSU[index][1], fpc[index], ##changed from fpc[index][1], to allow pps(brewer)
+ lonely.psu=lonely.psu,stratum=strata[index][1], stage=stage,cal=cal)
+ })
+ p<-NCOL(x)
+ nstrat<-length(unique(strata))
+ nokstrat<-sum(sapply(stratvars,function(m) !any(is.na(m))))
+ apply(array(unlist(stratvars),c(p,p,length(stratvars))),1:2,sum,na.rm=TRUE)*nstrat/nokstrat
+}
+
+
+svyrecvar<-function(x, clusters, stratas, fpcs, postStrata=NULL,
+ lonely.psu=getOption("survey.lonely.psu"),
+ one.stage=getOption("survey.ultimate.cluster")){
+
+ x<-as.matrix(x)
+ cal<-NULL
+
+ ## Remove post-stratum means, which may cut across clusters
+ ## Also center the data using any "g-calibration" models
+ if(!is.null(postStrata)){
+ for (psvar in postStrata){
+ if (inherits(psvar, "greg_calibration")) {
+ if (psvar$stage==0){
+ ## G-calibration at population level
+ x<-qr.resid(psvar$qr,x/psvar$w)*psvar$w
+ } else {
+ ## G-calibration within clusters
+ cal<-c(cal, list(psvar))
+ }
+ } else if (inherits(psvar, "raking")){
+ ## raking by iterative proportional fitting
+ for(iterations in 1:10){
+ for(margin in psvar){
+ psw<-attr(margin, "weights")
+ x<- x - psw*apply(x/psw, 2, ave, margin)
+ }
+ }
+ } else {
+ ## ordinary post-stratification
+ psw<-attr(psvar, "weights")
+ oldw<-attr(psvar, "oldweights")
+ if (is.null(oldw)) oldw<-rep(1,length(psw))
+ zeroes<-which(psw==0 & oldw==0)
+ if (length(zeroes)) psw[zeroes]=1
+ psvar<-as.factor(psvar)
+ psmeans<-rowsum(x*oldw/psw,psvar,reorder=TRUE)/as.vector(by(oldw,psvar,sum))
+ x<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw
+ }
+ }
+ }
+
+ multistage(x, clusters,stratas,fpcs$sampsize, fpcs$popsize,
+ lonely.psu=getOption("survey.lonely.psu"),
+ one.stage=one.stage,stage=1,cal=cal)
+}
+
+multistage<-function(x, clusters, stratas, nPSUs, fpcs,
+ lonely.psu=getOption("survey.lonely.psu"),
+ one.stage=FALSE,stage,cal){
+
+ n<-NROW(x)
+
+
+ v <- onestage(x,stratas[,1], clusters[,1], nPSUs[,1],
+ fpcs[,1], lonely.psu=lonely.psu,stage=stage,cal=cal)
+
+ if (one.stage!=TRUE && !is.null(fpcs) && NCOL(clusters)>1) {
+ v.sub<-by(1:n, list(as.numeric(clusters[,1])), function(index){
+ ## residuals for G-calibration using population information
+ ## only on clusters at this stage.
+ for(cali in cal){
+ if (cali$stage != stage)
+ next
+ j<-match(clusters[index,1],cali$index)
+ if (length(unique(j))!=1)
+ stop("Internal problem in g-calibration data: stage",stage,
+ ", cluster", j)
+ j<-j[[1]]
+ x[index,]<-qr.resid(cali$qr[[j]], x[index,,drop=FALSE]/cali$w[[j]])*cali$w[[j]]
+ }
+ multistage(x[index,,drop=FALSE], clusters[index,-1,drop=FALSE],
+ stratas[index,-1,drop=FALSE], nPSUs[index,-1,drop=FALSE],
+ fpcs[index,-1,drop=FALSE],
+ lonely.psu=lonely.psu,one.stage=one.stage-1,
+ stage=stage+1,cal=cal)*nPSUs[index[1],1]/fpcs[index[1],1]
+ })
+
+ for(i in 1:length(v.sub))
+ v<-v+v.sub[[i]]
+ }
+ dimnames(v)<-list(colnames(x),colnames(x))
+ v
+}
+
+
+## fpc not given are zero: full sampling.
+as.fpc<-function(df,strata,ids,pps=FALSE){
+
+ count<-function(x) length(unique(x))
+
+ sampsize<-matrix(ncol=ncol(ids),nrow=nrow(ids))
+ for(i in 1:ncol(ids))
+ split(sampsize[,i],strata[,i])<-lapply(split(ids[,i],strata[,i]),count)
+
+ if (is.null(df)){
+ ## No fpc
+ rval<-list(popsize=NULL, sampsize=sampsize)
+ class(rval)="survey_fpc"
+ return(rval)
+ }
+
+ fpc<-as.matrix(df)
+ if (xor(ispopsize<-any(df>1), all(df>=1))){
+ big<-which(fpc>=1,arr.ind=TRUE)
+ small<-which(fpc<1,arr.ind=TRUE)
+ cat("record",big[1,1]," stage",big[1,2],": fpc=", fpc[big[1,,drop=FALSE]],"\n")
+ cat("record",small[1,1]," stage ",small[1,2],": fpc=", fpc[small[1,,drop=FALSE]],"\n")
+ stop("Must have all fpc>=1 or all fpc<=1")
+ }
+
+ if (ispopsize){
+ if(pps) stop("fpc must be specified as sampling fraction for PPS sampling")
+ popsize<-fpc
+ } else {
+ popsize<-sampsize/(fpc)
+ }
+ if (any(popsize<sampsize)){
+ toobig<-which(popsize<sampsize,arr.ind=TRUE)
+ cat("record",toobig[1,1],"stage",toobig[1,2],": popsize=",popsize[toobig[1,,drop=FALSE]],
+ " sampsize=", sampsize[toobig[1,,drop=FALSE]],"\n")
+ stop("FPC implies >100% sampling in some strata")
+ }
+ if (!ispopsize && any(is.finite(popsize) & (popsize>1e10))){
+ big<-which(popsize>1e10 & is.finite(popsize),arr.ind=TRUE)
+ warning("FPC implies population larger than ten billion (record",big[1,1]," stage ",big[1,2],")")
+ }
+ if(!pps){
+ ## check that fpc is constant within strata.
+ for(i in 1:ncol(popsize)){
+ diff<-by(popsize[,i], list(strata[,i]), count)
+ if (any(as.vector(diff)>1)){
+ j<-which(as.vector(diff)>1)[1]
+ warning("`fpc' varies within strata: stratum ",names(diff)[j], " at stage ",i)
+ }
+ }
+ } else{
+ ## check that fpc is constant with clusters
+ diff<-by(popsize[,i], list(ids[,i]), count)
+ if (any(as.vector(diff)>1)){
+ j<-which(as.vector(diff)>1)[1]
+ warning("`fpc' varies within cluster: cluster ",names(diff)[j], " at stage ",i)
+ }
+ }
+
+
+ rval<-list(popsize=popsize, sampsize=sampsize)
+ class(rval)<-"survey_fpc"
+ rval
+}
+
+"weights.survey_fpc"<-function(object,final=TRUE,...){
+ if (is.null(object$popsize) || any(object$popsize>1e12))
+ stop("Weights not supplied and can't be computed from fpc.")
+ if (final) {
+ pop<-apply(object$popsize,1,prod)
+ samp<-apply(object$sampsize,1,prod)
+ pop/samp
+ } else {
+ object$popsize/object$sampsize
+ }
+}
+
+
+
+
+print.survey.design2<-function(x,varnames=FALSE,design.summaries=FALSE,...){
+ n<-NROW(x$cluster)
+ if (x$has.strata) cat("Stratified ")
+ un<-length(unique(x$cluster[,1]))
+ if(n==un){
+ cat("Independent Sampling design")
+ is.independent<-TRUE
+ if (is.null(x$fpc$popsize))
+ cat(" (with replacement)\n")
+ else cat("\n")
+ } else {
+ cat(NCOL(x$cluster),"- level Cluster Sampling design")
+ if (is.null(x$fpc$popsize))
+ cat(" (with replacement)\n")
+ else cat("\n")
+ nn<-lapply(x$cluster,function(i) length(unique(i)))
+ cat(paste("With (",paste(unlist(nn),collapse=", "),") clusters.\n",sep=""))
+ is.independent<-FALSE
+ }
+
+ print(x$call)
+ if (design.summaries){
+ cat("Probabilities:\n")
+ print(summary(x$prob))
+ if(x$has.strata){
+ if (NCOL(x$cluster)>1)
+ cat("First-level ")
+ cat("Stratum Sizes: \n")
+ oo<-order(unique(x$strata[,1]))
+ a<-rbind(obs=table(x$strata[,1]),
+ design.PSU=x$fpc$sampsize[!duplicated(x$strata[,1]),1][oo],
+ actual.PSU=table(x$strata[!duplicated(x$cluster[,1]),1]))
+ print(a)
+ }
+ if (!is.null(x$fpc$popsize)){
+ if (x$has.strata) {
+ cat("Population stratum sizes (PSUs): \n")
+ s<-!duplicated(x$strata[,1])
+ a<-x$fpc$popsize[s,1]
+ names(a)<-x$strata[s,1]
+ a<-a[order(names(a))]
+ print(a)
+ } else {
+ cat("Population size (PSUs):",x$fpc$popsize[1,1],"\n")
+ }
+ }
+ }
+ if (varnames){
+ cat("Data variables:\n")
+ print(colnames(x))
+ }
+ invisible(x)
+}
+
+
+summary.survey.design2<-function(object,...){
+ class(object)<-c("summary.survey.design2",class(object))
+ object
+}
+
+print.summary.survey.design2<-function(x,...){
+ y<-x
+ class(y)<-c("survey.design2",class(x))
+ print(y,varnames=TRUE,design.summaries=TRUE,...)
+}
+
+
+.svycheck<-function(object){
+ if (inherits(object,"survey.design") &&
+ !is.null(object$nPSU))
+ warning("This is an old-style design object. Please use as.svydesign2 to update it.")
+}
+
+as.svydesign2<-function(object){
+ if (inherits(object,"survey.design2"))
+ return(object)
+ if (!inherits(object,"survey.design"))
+ stop("This function is for updating old-style survey.design objects")
+
+
+ count<-function(x) length(unique(x))
+
+ strata<-data.frame(one=object$strata)
+ if ((nc<-ncol(object$cluster))>1){
+ for(i in 2:nc){
+ strata<-cbind(strata,object$cluster[,i-1])
+ }
+ }
+
+ sampsize<-matrix(ncol=nc,nrow=nrow(object$cluster))
+
+ sampsize[,1]<-object$nPSU[match(object$strata, names(object$nPSU))]
+ if (nc>1){
+ for(i in 2:nc){
+ split(sampsize[,i],strata[,i])<-lapply(split(object$cluster[,i],strata[,i]),count)
+ }
+ }
+
+ if (!is.null(object$fpc)){
+ popsize<-sampsize
+ popsize[,1]<-object$fpc$N[match(object$strata,object$fpc$strata)]
+ } else popsize<-NULL
+ if (nc>1 && !is.null(object$fpc)){
+ warning("Assuming complete sampling at stages 2 -",nc)
+ }
+
+ fpc<-list(popsize=popsize,sampsize=sampsize)
+ class(fpc)<-"survey_fpc"
+
+
+ object$fpc<-fpc
+ object$strata<-strata
+ object$nPSU<-NULL
+ class(object)<-c("survey.design2","survey.design")
+ object
+
+}
+
+is.pps<-function(x) if(is.null(x$pps)) FALSE else (x$pps!=FALSE)
+
+"[.survey.design2"<-function (x,i, ..., drop=TRUE){
+ if (!missing(i)){
+ if (is.calibrated(x) || is.pps(x) || !drop){
+ ## Set weights to zero: no memory saving possible
+ ## There should be an easier way to complement a subscript..
+ if (is.logical(i))
+ x$prob[!i]<-Inf
+ else if (is.numeric(i) && length(i))
+ x$prob[-i]<-Inf
+ else {
+ tmp<-x$prob[i,]
+ x$prob<-rep(Inf, length(x$prob))
+ x$prob[i,]<-tmp
+ }
+ index<-is.finite(x$prob)
+ psu<-!duplicated(x$cluster[index,1])
+ tt<-table(x$strata[index,1][psu])
+ if(any(tt==1) && getOption("survey.adjust.domain.lonely")){
+ warning(sum(tt==1)," strata have only one PSU in this subset.")
+ }
+ } else {
+ ## subset everything.
+ if (!is.null(x$variables)) ## phase 2 of twophase design
+ x$variables<-"[.data.frame"(x$variables,i,..1,drop=FALSE)
+ x$cluster<-x$cluster[i,,drop=FALSE]
+ x$prob<-x$prob[i]
+ x$allprob<-x$allprob[i,,drop=FALSE]
+ x$strata<-x$strata[i,,drop=FALSE]
+ x$fpc$sampsize<-x$fpc$sampsize[i,,drop=FALSE]
+ x$fpc$popsize<-x$fpc$popsize[i,,drop=FALSE]
+ }
+
+ } else {
+ if(!is.null(x$variables))
+ x$variables<-x$variables[,..1,drop=FALSE]
+ }
+
+ x
+}
+
+svytotal.survey.design2<-function(x,design, na.rm=FALSE, deff=FALSE,...){
+
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$variables,na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ } else{
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else {
+ if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ }
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ if (length(nas)>length(design$prob))
+ x<-x[nas==0,,drop=FALSE]
+ else
+ x[nas>0,]<-0
+ }
+
+ N<-sum(1/design$prob)
+ total <- colSums(x/as.vector(design$prob),na.rm=na.rm)
+ class(total)<-"svystat"
+ attr(total, "var")<-v<-svyrecvar(x/design$prob,design$cluster,
+ design$strata, design$fpc,
+ postStrata=design$postStrata)
+ attr(total,"statistic")<-"total"
+
+ if (is.character(deff) || deff){
+ nobs<-sum(weights(design)!=0)
+ if (deff=="replace")
+ vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2/nobs
+ else
+ vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2*(N-nobs)/(N*nobs)
+ attr(total, "deff")<-v/vsrs
+ }
+
+
+ return(total)
+}
+
+
+svymean.survey.design2<-function(x,design, na.rm=FALSE,deff=FALSE,...){
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$variables,na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ }
+ else {
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ if (length(nas)>length(design$prob))
+ x<-x[nas==0,,drop=FALSE]
+ else
+ x[nas>0,]<-0
+ }
+
+ pweights<-1/design$prob
+ psum<-sum(pweights)
+ average<-colSums(x*pweights/psum)
+ x<-sweep(x,2,average)
+ v<-svyrecvar(x*pweights/psum,design$cluster,design$strata, design$fpc,
+ postStrata=design$postStrata)
+ attr(average,"var")<-v
+ attr(average,"statistic")<-"mean"
+ class(average)<-"svystat"
+ if (is.character(deff) || deff){
+ nobs<-sum(weights(design)!=0)
+ if(deff=="replace"){
+ vsrs<-svyvar(x,design,na.rm=na.rm)/(nobs)
+ } else {
+ if(psum<nobs) {
+ vsrs<-NA*v
+ warning("Sample size greater than population size: are weights correctly scaled?")
+ } else{
+ vsrs<-svyvar(x,design,na.rm=na.rm)*(psum-nobs)/(psum*nobs)
+ }
+ }
+ attr(average, "deff")<-v/vsrs
+ }
+
+ return(average)
+}
+
+svyratio.survey.design2<-function(numerator=formula, denominator, design, separate=FALSE,na.rm=FALSE,formula,covmat=FALSE,deff=FALSE,...){
+
+ if (separate){
+ strats<-sort(unique(design$strata[,1]))
+ if (!design$has.strata)
+ warning("Separate and combined ratio estimators are the same for unstratified designs")
+ rval<-list(ratios=lapply(strats,
+ function(s) {
+ tmp<-svyratio(numerator, denominator,
+ subset(design, design$strata[,1] %in% s),
+ separate=FALSE,...)
+ attr(tmp,"call")<-bquote(Stratum==.(s))
+ tmp}))
+ names(rval$ratios)<-strats
+
+ class(rval)<-c("svyratio_separate")
+ rval$call<-sys.call()
+ rval$strata<-strats
+ return(rval)
+ }
+
+ if (inherits(numerator,"formula"))
+ numerator<-model.frame(numerator,design$variables,na.action=na.pass)
+ else if(typeof(numerator) %in% c("expression","symbol"))
+ numerator<-eval(numerator, design$variables)
+ if (inherits(denominator,"formula"))
+ denominator<-model.frame(denominator,design$variables,na.action=na.pass)
+ else if(typeof(denominator) %in% c("expression","symbol"))
+ denominator<-eval(denominator, design$variables)
+
+ numerator<-as.matrix(numerator)
+ denominator<-as.matrix(denominator)
+ nn<-NCOL(numerator)
+ nd<-NCOL(denominator)
+
+ all<-cbind(numerator,denominator)
+ nas<-!complete.cases(all)
+ if ((na.rm==TRUE) && any(nas)){
+ design<-design[!nas,]
+ if (NROW(design$cluster) == NROW(all)){
+ ## subset by zero weights
+ all[nas,]<-1
+ numerator[nas,]<-0
+ denominator[nas,]<-1
+ } else {
+ ## subset by actually dropping rows
+ all<-all[!nas,,drop=FALSE]
+ numerator<-numerator[!nas,,drop=FALSE]
+ denominator<-denominator[!nas,,drop=FALSE]
+ }
+ }
+ allstats<-svytotal(all,design)
+ rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/"))
+
+
+ vars<-matrix(ncol=nd,nrow=nn)
+
+ if (deff) deffs<-matrix(ncol=nd,nrow=nn)
+
+ for(i in 1:nn){
+ for(j in 1:nd){
+ r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob)
+ vars[i,j]<-svyrecvar(r*1/design$prob, design$cluster, design$strata, design$fpc,
+ postStrata=design$postStrata)
+ if (deff){
+ deffs[i,j]<-deff(svytotal(r,design,deff=TRUE))
+ }
+ }
+ }
+ if (covmat){
+ ii<-rep(1:nn,nd)
+ jj<-rep(1:nd,each=nn)
+ allr<-sweep(numerator[,ii]-t(as.vector(rval$ratio)*t(denominator[,jj,drop=FALSE])),
+ 2, colSums(denominator[,jj,drop=FALSE]/design$prob),"/")
+ vcovmat<-svyrecvar(allr*1/design$prob, design$cluster, design$strata, design$fpc,
+ postStrata=design$postStrata)
+ colnames(vcovmat)<-colnames(denominator)[ii]
+ rval$vcov<-vcovmat
+ }
+ colnames(vars)<-colnames(denominator)
+ rownames(vars)<-colnames(numerator)
+ rval$var<-vars
+ if (deff) attr(rval,"deff")<-deffs
+ attr(rval,"call")<-sys.call()
+ class(rval)<-"svyratio"
+ rval
+
+ }
diff --git a/R/multivariate.R b/R/multivariate.R
new file mode 100644
index 0000000..dac698b
--- /dev/null
+++ b/R/multivariate.R
@@ -0,0 +1,155 @@
+
+svyfactanal<-function(formula, design, factors,n=c("none", "sample","degf","effective","min.effective"),...){
+ v<-svyvar(formula,design)
+ n<-match.arg(n)
+ s2<-diag(v)
+ ses2<-diag(matrix(SE(v), length(s2), length(s2)))
+ neff<-2*(s2/ses2)^2
+ n<-switch(n, sample=nrow(design)-1, degf=degf(design), effective=1/mean(1/neff),min.effective=min(neff), none=NA)+1
+ f<-factanal(covmat=v, factors=factors, n.obs=n,...)
+ f$call<-sys.call()
+ f
+}
+
+
+svyprcomp<-function (formula, design, center = TRUE, scale. = FALSE, tol = NULL, scores=FALSE,
+ ...)
+{
+ tms<-terms(formula)
+ attr(tms,"intercept")<-0
+ mf<-model.frame(formula,model.frame(design))
+ naa<-attr(mf,"na.action")
+ x <- model.matrix(tms,mf)
+ if(length(naa))
+ w<-weights(design,"sampling")[-naa]
+ else
+ w<-weights(design,"sampling")
+
+ x<-x*sqrt(w/mean(w))
+ x <- scale(x, center = center, scale = scale.)
+ cen <- attr(x, "scaled:center")
+ sc <- attr(x, "scaled:scale")
+ if (any(sc == 0))
+ stop("cannot rescale a constant/zero column to unit variance")
+ s <- svd(x, nu = 0)
+ s$d <- s$d/sqrt(max(1, nrow(x) - 1))
+ if (!is.null(tol)) {
+ rank <- sum(s$d > (s$d[1L] * tol))
+ if (rank < ncol(x)) {
+ s$v <- s$v[, 1L:rank, drop = FALSE]
+ s$d <- s$d[1L:rank]
+ }
+ }
+ dimnames(s$v) <- list(colnames(x), paste("PC", seq_len(ncol(s$v)),
+ sep = ""))
+ r <- list(sdev = s$d, rotation = s$v, center = if (is.null(cen)) FALSE else cen,
+ scale = if (is.null(sc)) FALSE else sc)
+ r$weights<-w/mean(w)
+
+ if (scores)
+ r$x <- (x %*% s$v)/sqrt(r$weights)
+
+ r$naa<-naa
+ r$design<-design
+ class(r) <- c("svyprcomp","prcomp")
+ r
+}
+
+
+biplot.svyprcomp<-function(x, cols=c("black","darkred"),xlabs=NULL,weight=c("transparent","scaled","none"),
+ max.alpha=0.5,max.cex=0.5,xlim=NULL,ylim=NULL,pc.biplot=FALSE,expand=1,xlab=NULL,ylab=NULL,
+ arrow.len=0.1,
+ ...){
+
+ if(is.null(xlabs)){
+ xlabs<-1:NROW(x$x)
+ } else {
+ if (inherits(xlabs,"formula")){
+ mf<-model.frame(xlabs,model.frame(x$design),na.action=na.pass)
+ if(length(x$na.action))
+ mf<-mf[-x$na.action,]
+ if(ncol(mf)>1) xlabs<-sapply(mf,paste,collapse=".") else xlabs<-as.character(mf[[1]])
+ }
+ }
+
+
+ scores<-x$x
+
+ lam <- x$sdev[1:2]
+ n <- NROW(scores)
+ lam <- lam * sqrt(n)
+
+ if (pc.biplot)
+ lam <- lam/sqrt(n)
+
+ xx<-t(t(scores[, 1:2])/lam)
+ yy<-t(t(x$rotation[,1:2]) * lam)
+
+
+ if (missing(xlabs)) {
+ xlabs <- dimnames(x)[[1L]]
+ if (is.null(xlabs))
+ xlabs <- 1L:n
+ }
+
+ xlabs <- as.character(xlabs)
+ dimnames(xx) <- list(xlabs, dimnames(xx)[[2L]])
+ ylabs <- dimnames(yy)[[1L]]
+
+ ylabs <- as.character(ylabs)
+ dimnames(yy) <- list(ylabs, dimnames(yy)[[2L]])
+
+
+ weight<-match.arg(weight)
+
+ w<-weights(x$design)
+ if (length(x$na.action)) w<-w[-x$na.action]
+
+ if (weight=="transparent"){
+ xcexs<-par("cex")*max.cex
+ rgbcol<-col2rgb(rep(cols[1],length=length(w)))
+ xcols<-rgb(rgbcol[1,],rgbcol[2,],rgbcol[3,],alpha=pmax(1,255*w*max.alpha/max(w)), maxColorValue=255)
+ } else if (weight=="scaled"){
+ xcexs<-par("cex")*pmax(0.2, max.cex*sqrt(w/max(w)))
+ rgbcol<-col2rgb(cols[1])
+ xcols<-rgb(rgbcol[1,],rgbcol[2,],rgbcol[3,],alpha=max.alpha*255, maxColorValue=255)
+ } else if (weight=="none"){
+ rgbcol<-col2rgb(cols[1])
+ xcols<-rgb(rgbcol[1,],rgbcol[2,],rgbcol[3,],alpha=max.alpha*255, maxColorValue=255)
+ xcexs<-par("cex")*max.cex
+ }
+
+
+ unsigned.range <- function(x) c(-abs(min(x, na.rm = TRUE)), abs(max(x, na.rm = TRUE)))
+
+ rangx1 <- unsigned.range(xx[, 1L])
+ rangx2 <- unsigned.range(xx[, 2L])
+ rangy1 <- unsigned.range(yy[, 1L])
+ rangy2 <- unsigned.range(yy[, 2L])
+
+ if (is.null(xlim) && is.null(ylim))
+ xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
+ else if (is.null(xlim))
+ xlim <- rangx1
+ else if (is.null(ylim))
+ ylim <- rangx2
+
+ ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
+ on.exit(par(op))
+ op <- par(pty = "s")
+
+ plot(xx, type = "n", xlim = xlim, ylim = ylim, col = cols[1],
+ xlab = xlab, ylab = ylab, ...)
+ text(xx, xlabs, cex = xcexs, col = xcols, ...)
+ par(new = TRUE)
+ plot(yy, axes = FALSE, type = "n", xlim = xlim * ratio, ylim = ylim *
+ ratio, xlab = "", ylab = "", col = xcols, ...)
+ axis(3, col = cols[2L], ...)
+ axis(4, col = cols[2L], ...)
+ box(col = cols[1L])
+ text(yy, labels = ylabs, col = cols[2L], ...)
+ arrows(0, 0, yy[, 1L] * 0.8, yy[, 2L] * 0.8, col = cols[2L],
+ length = arrow.len)
+ invisible()
+}
+
diff --git a/R/odbc.R b/R/odbc.R
new file mode 100644
index 0000000..05c2196
--- /dev/null
+++ b/R/odbc.R
@@ -0,0 +1,220 @@
+
+
+print.ODBCsvydesign<-function(x,...){
+ cat("ODBC-backed ")
+ NextMethod()
+ if (!checkConnection(x$db$connection, error=FALSE))
+ cat("<ODBC Connection closed>\n")
+ invisible(x)
+}
+
+summary.ODBCsvydesign<-function(object,...){
+ class(object)<-c("summary.ODBCsvydesign",class(object))
+ object
+}
+
+print.summary.ODBCsvydesign<-function(x,...){
+ print.survey.design2(x,varnames=TRUE,design.summaries=TRUE,...)
+ invisible(x)
+}
+
+close.ODBCsvydesign<-function(con,...){
+ RODBC::odbcClose(con$db$connection,...)
+ invisible(con)
+}
+
+open.ODBCsvydesign<-function(con,...){
+ oldenc<-attr(con$db$connection,"encoding") ## bug in RODBC 1.2-3
+ con$db$connection<-RODBC::odbcReConnect(con$db$connection,...)
+ attr(con$db$connection,"encoding")<-oldenc
+ con
+}
+
+svymean.ODBCsvydesign<-function(x, design,...){
+ design$variables<-getvars(x, design$db$connection, design$db$tablename, updates=design$updates)
+ NextMethod("svymean",design)
+}
+
+
+svytotal.ODBCsvydesign<-function(x, design,na.rm=FALSE,...){
+ design$variables<-getvars(x, design$db$connection, design$db$tablename, updates=design$updates)
+ NextMethod("svytotal",design)
+}
+
+svyquantile.ODBCsvydesign<-function(x, design,quantiles,...){
+ design$variables<-getvars(x, design$db$connection, design$db$tablename, updates=design$updates)
+ NextMethod("svyquantile",design)
+}
+
+svyglm.ODBCsvydesign<-function(formula, design,...){
+ design$variables<-dropFactor(getvars(formula, design$db$connection, design$db$tablename,updates=design$updates),
+ weights(design))
+ NextMethod("svyglm",design)
+}
+
+
+svyranktest.ODBCsvydesign<-function(formula, design,...){
+ design$variables<-dropFactor(getvars(formula, design$db$connection, design$db$tablename,updates=design$updates),
+ weights(design))
+ NextMethod("svyranktest",design)
+}
+
+
+svyplot.ODBCsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename, updates=design$updates)
+ design$variables[weights(design)==0,]<-NA
+ NextMethod("svyplot",design)
+}
+
+
+svycoplot.ODBCsvydesign<-function(formula,design, style=c("hexbin","transparent"),
+ basecol="black",alpha=c(0,0.8),hexscale=c("relative","absolute"),...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates)
+ design$variables[weights(design)==0,]<-NA
+ NextMethod("svycoplot",design)
+}
+
+svyboxplot.ODBCsvydesign<-function(formula,design, ...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates)
+ design$variables[weights(design)==0,]<-NA
+ class(design)<-setdiff(class(design),"ODBCsvydesign")
+ svyboxplot(formula,design,...)
+}
+
+svycdf.ODBCsvydesign<-function(formula,design, na.rm=TRUE, ...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates)
+ NextMethod("svycdf",design)
+
+}
+
+svyolr.ODBCsvydesign<-function(formula,design,...){
+ design$variables<-dropFactor(getvars(formula, design$db$connection, design$db$tablename,updates=design$updates),
+ weights(design))
+ NextMethod("svyolr",design)
+}
+
+svycoxph.ODBCsvydesign<-function(formula,design,...){
+ design$variables<-dropFactor(getvars(formula, design$db$connection, design$db$tablename,updates=design$updates),
+ weights(design))
+ NextMethod("svycoxph",design)
+}
+
+svyvar.ODBCsvydesign<-function(x,design,na.rm=FALSE,...){
+ design$variables<-getvars(x, design$db$connection, design$db$tablename,updates=design$updates)
+ NextMethod("svyvar",design)
+}
+
+
+
+svykm.ODBCsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates)
+ NextMethod("svykm",design)
+}
+
+
+svykappa.ODBCsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates)
+ NextMethod("svykappa",design)
+}
+
+
+svysmooth.ODBCsvydesign<-function(formula,design,method=c("locpoly","quantreg"),bandwidth,quantile,df,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates)
+ NextMethod("svysmooth",design)
+}
+
+
+svychisq.ODBCsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates)
+ NextMethod("svychisq",design)
+}
+
+svyratio.ODBCsvydesign<-function(numerator, denominator, design,...){
+ design$variables<-cbind(getvars(numerator,design$db$connection, design$db$tablename,updates=design$updates),
+ getvars(denominator,design$db$connection, design$db$tablename,updates=design$updates))
+ NextMethod("svyratio",design)
+
+}
+
+
+svyby.ODBCsvydesign<-function(formula, by, design,...){
+ design$variables<-cbind(getvars(formula,design$db$connection, design$db$tablename,updates=design$updates),
+ getvars(by,design$db$connection, design$db$tablename,updates=design$updates))
+ class(design)<-setdiff(class(design),"ODBCsvydesign")
+ svyby(formula,by,design,...)
+}
+
+svytable.ODBCsvydesign<-function(formula,design,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates)
+ NextMethod("svytable",design)
+}
+
+calibrate.ODBCsvydesign<-function(design,formula,...){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,updates=design$updates)
+ NextMethod("calibrate",design)
+}
+postStratify.ODBCsvydesign<-function(design, strata, population, partial = FALSE, ...) .NotYetImplemented()
+
+subset.ODBCsvydesign<-function (x, subset, ...)
+{
+ e <- substitute(subset)
+ x$variables<-getvars(make.formula(all.vars(e)), x$db$connection, x$db$tablename,updates=x$updates)
+ r <- eval(e, x$variables, parent.frame())
+ r <- r & !is.na(r)
+ x <- x[r, ]
+ x$call <- sys.call(-1)
+ x
+}
+
+
+dim.ODBCsvydesign<-function(x){
+ w<-weights(x)
+ nrow<-sum(w!=0)
+ coln<-names(RODBC::sqlQuery(x$db$conn, paste("select * from", x$db$tablename, "limit 1")))
+ if (!is.null(x$updates)){
+ update.names<-do.call(c, lapply(x$updates, names))
+ ncol<-length(unique(c(coln,update.names)))
+ } else ncol<-length(coln)
+ c(nrow,ncol)
+}
+
+
+dimnames.ODBCsvydesign<-function(x){
+ w<-weights(x)
+ rown<-rownames(x$cluster)[w!=0]
+ coln<-names(RODBC::sqlQuery(x$db$conn, paste("select * from", x$db$tablename, "limit 1")))
+ if (!is.null(x$updates)){
+ update.names<-do.call(c, lapply(x$updates, names))
+ coln<-unique(c(coln,update.names))
+ }
+ list(rown,coln)
+}
+
+
+"[.ODBCsvydesign"<-function (x, i, ..., drop = TRUE)
+{
+ if (!missing(i)) {
+ if (is.logical(i))
+ x$prob[!i] <- Inf
+ else if (is.numeric(i) && length(i))
+ x$prob[-i] <- Inf
+ else {
+ tmp <- x$prob[i, ]
+ x$prob <- rep(Inf, length(x$prob))
+ x$prob[i, ] <- tmp
+ }
+ index <- is.finite(x$prob)
+ psu <- !duplicated(x$cluster[index, 1])
+ tt <- table(x$strata[index, 1][psu])
+ if (any(tt == 1)) {
+ warning(sum(tt == 1), " strata have only one PSU in this subset.")
+ }
+
+ }
+ else {
+ if (!is.null(x$variables))
+ x$variables <- x$variables[, ..1, drop = FALSE]
+ }
+ x
+}
+
diff --git a/R/olr.R b/R/olr.R
new file mode 100644
index 0000000..c06f866
--- /dev/null
+++ b/R/olr.R
@@ -0,0 +1,338 @@
+svyolr<-function(formula, design,...) UseMethod("svyolr",design)
+
+##
+## Much of this is taken from MASS polr, thus the GPL-2 license.
+##
+
+
+svyolr.svyrep.design<-function(formula,design,...,return.replicates=FALSE,
+ multicore=getOption("survey.multicore")){
+ environment(formula)<-environment()
+ df<-model.frame(design)
+ pwt<-weights(design,"sampling")
+ if (multicore && !requireNamespace("parallel", quietly=TRUE))
+ multicore <- FALSE
+
+ rval<-suppressWarnings(MASS::polr(formula,data=df,...,Hess=TRUE,model=FALSE,
+ weights=pwt))
+ start<-c(rval$coefficients,rval$zeta)
+ rw<-weights(design,"analysis")
+ if (multicore){
+ betas<-do.call(cbind,parallel::mclapply(1:ncol(rw), function(i){
+ w<-rw[,i]
+ environment(formula)<-environment()
+ m<-MASS::polr(formula,data=df,Hess=FALSE, start=start, model=FALSE, weights=w)
+ c(m$coefficients, m$zeta)
+ }))
+ } else {
+ betas<-apply(rw,2,function(w) {
+ environment(formula)<-environment()
+ m<-MASS::polr(formula,data=df,Hess=FALSE, start=start, model=FALSE, weights=w)
+ c(m$coefficients, m$zeta)
+ })
+ }
+ rval$var<-svrVar(t(betas),design$scale,design$rscales,mse=design$mse, coef=start)
+ rval$df.residual<-degf(design)-length(rval$coefficients)
+ rval$deviance<-rval$deviance/mean(pwt)
+ class(rval)<-"svyolr"
+ rval$call<-sys.call()
+ rval$call[[1]]<-as.name(.Generic)
+ if (return.replicates) rval$replicates<-t(betas)
+ rval
+ }
+
+
+pgumbel<-
+function (q, loc = 0, scale = 1, lower.tail = TRUE)
+{
+ q <- (q - loc)/scale
+ p <- exp(-exp(-q))
+ if (!lower.tail)
+ 1 - p
+ else p
+}
+dgumbel<-function (x, loc = 0, scale = 1, log = FALSE)
+{
+ x <- (x - loc)/scale
+ d <- log(1/scale) - x - exp(-x)
+ if (!log)
+ exp(d)
+ else d
+}
+
+
+svyolr.survey.design2<-function (formula, design, start, ..., na.action=na.omit,
+ method = c("logistic", "probit", "cloglog", "cauchit"))
+{
+ logit <- function(p) log(p/(1 - p))
+ fmin <- function(beta) {
+ theta <- beta[pc + 1:q]
+ gamm <- c(-100, cumsum(c(theta[1], exp(theta[-1]))),
+ 100)
+ eta <- offset
+ if (pc > 0)
+ eta <- eta + drop(x %*% beta[1:pc])
+ pr <- pfun(gamm[y + 1] - eta) - pfun(gamm[y] - eta)
+ if (all(pr > 0))
+ -sum(wt * log(pr))
+ else Inf
+ }
+ gmini <- function(beta) {
+ jacobian <- function(theta) {
+ k <- length(theta)
+ etheta <- exp(theta)
+ mat <- matrix(0, k, k)
+ mat[, 1] <- rep(1, k)
+ for (i in 2:k) mat[i:k, i] <- etheta[i]
+ mat
+ }
+ theta <- beta[pc + 1:q]
+ gamm <- c(-100, cumsum(c(theta[1], exp(theta[-1]))),
+ 100)
+ eta <- offset
+ if (pc > 0)
+ eta <- eta + drop(x %*% beta[1:pc])
+ pr <- pfun(gamm[y + 1] - eta) - pfun(gamm[y] - eta)
+ p1 <- dfun(gamm[y + 1] - eta)
+ p2 <- dfun(gamm[y] - eta)
+ g1 <- if (pc > 0)
+ x * (wt * (p1 - p2)/pr)
+ else numeric(0)
+ xx <- .polrY1 * p1 - .polrY2 * p2
+ g2 <- - xx * (wt/pr)
+ g2 <- g2 %*% jacobian(theta)
+ if (all(pr > 0))
+ cbind(g1, g2)
+ else NA+cbind(g1,g2)
+ }
+ gmin<-function(beta){
+ colSums(gmini(beta))
+ }
+ m <- match.call(expand.dots = FALSE)
+ method <- match.arg(method)
+
+ pfun <- switch(method, logistic = plogis, probit = pnorm,
+ cloglog = pgumbel, cauchit = pcauchy)
+ dfun <- switch(method, logistic = dlogis, probit = dnorm,
+ cloglog = dgumbel, cauchit = dcauchy)
+
+
+ m<-model.frame(formula,model.frame(design),na.action=na.pass)
+ Terms <- attr(m, "terms")
+ m<-na.action(m)
+ nao<-attr(m,"na.action")
+ if(length(nao)) {
+ design<-design[-nao,]
+ }
+
+ x <- model.matrix(Terms, m)
+ xint <- match("(Intercept)", colnames(x), nomatch = 0)
+ n <- nrow(x)
+ pc <- ncol(x)
+ cons <- attr(x, "contrasts")
+ if (xint > 0) {
+ x <- x[, -xint, drop = FALSE]
+ pc <- pc - 1
+ }
+ else warning("an intercept is needed and assumed")
+
+ wt <- weights(design)
+
+
+ offset <- model.offset(m)
+ if (length(offset) <= 1)
+ offset <- rep(0, n)
+ y <- model.response(m)
+ if (!is.factor(y))
+ stop("response must be a factor")
+ lev <- levels(y)
+ if (length(lev) <= 2)
+ stop("response must have 3 or more levels")
+ y <- unclass(y)
+ q <- length(lev) - 1
+ Y <- matrix(0, n, q)
+ .polrY1 <- col(Y) == y
+ .polrY2 <- col(Y) == y - 1
+ if (missing(start)) {
+ q1 <- length(lev)%/%2
+ y1 <- (y > q1)
+ X <- cbind(Intercept = rep(1, n), x)
+ fit <- switch(method, logistic = glm.fit(X, y1, wt/mean(wt), family = quasibinomial(),
+ offset = offset), probit = glm.fit(X, y1, wt/mean(wt), family = quasibinomial("probit"),
+ offset = offset), cloglog = glm.fit(X, y1, wt/mean(wt), family = quasibinomial("probit"),
+ offset = offset), cauchit = glm.fit(X, y1, wt/mean(wt), family = quasibinomial("cauchit"),
+ offset = offset))
+ if (!fit$converged)
+ stop("attempt to find suitable starting values failed")
+ coefs <- fit$coefficients
+ if (any(is.na(coefs))) {
+ warning("design appears to be rank-deficient, so dropping some coefs")
+ keep <- names(coefs)[!is.na(coefs)]
+ coefs <- coefs[keep]
+ x <- x[, keep[-1], drop = FALSE]
+ pc <- ncol(x)
+ }
+ spacing <- logit((1:q)/(q + 1))
+ if (method != "logit")
+ spacing <- spacing/1.7
+ gammas <- -coefs[1] + spacing - spacing[q1]
+ thetas <- c(gammas[1], log(diff(gammas)))
+ start <- c(coefs[-1], thetas)
+ }
+ else if (length(start) != pc + q)
+ stop("'start' is not of the correct length")
+ res <- optim(start, fmin, gmin, method = "BFGS", hessian = TRUE,
+ ...)
+ beta <- res$par[seq_len(pc)]
+ theta <- res$par[pc + 1:q]
+ zeta <- cumsum(c(theta[1], exp(theta[-1])))
+ deviance <- 2 * res$value/mean(wt)
+ niter <- c(f.evals = res$counts[1], g.evals = res$counts[2])
+ names(zeta) <- paste(lev[-length(lev)], lev[-1], sep = "|")
+ if (pc > 0) {
+ names(beta) <- colnames(x)
+ eta <- drop(x %*% beta)
+ }
+ else {
+ eta <- rep(0, n)
+ }
+ cumpr <- matrix(pfun(matrix(zeta, n, q, byrow = TRUE) - eta),
+ , q)
+ fitted <- t(apply(cumpr, 1, function(x) diff(c(0, x, 1))))
+ dimnames(fitted) <- list(row.names(m), lev)
+ fit <- list(coefficients = beta, zeta = zeta, deviance = deviance,
+ fitted.values = fitted, lev = lev, terms = Terms, df.residual = sum(wt) -
+ pc - q, edf = pc + q, n = sum(wt), nobs = sum(wt),
+ method = method, convergence = res$convergence,
+ niter = niter)
+
+ dn <- c(names(beta), names(zeta))
+ H <- res$hessian
+ dimnames(H) <- list(dn, dn)
+ fit$Hessian <- H
+
+ fit$call<-sys.call()
+ fit$call[[1]]<-as.name(.Generic)
+
+ inffun<- gmini(res$par)%*%solve(H)
+ fit$var<-svyrecvar(inffun, design$cluster,
+ design$strata, design$fpc,
+ postStrata = design$postStrata)
+ fit$df.residual<-degf(design)-length(beta)
+
+
+ fit$na.action <- attr(m, "na.action")
+ fit$contrasts <- cons
+ fit$xlevels <- .getXlevels(Terms, m)
+ class(fit) <- "svyolr"
+ fit
+}
+
+
+vcov.svyolr<-function(object,...) object$var
+
+print.svyolr<-function (x, ...)
+{
+ if (!is.null(cl <- x$call)) {
+ cat("Call:\n")
+ dput(cl, control = NULL)
+ }
+ if (length(coef(x))) {
+ cat("\nCoefficients:\n")
+ print(coef(x,intercept=FALSE), ...)
+ }
+ else {
+ cat("\nNo coefficients\n")
+ }
+ cat("\nIntercepts:\n")
+ print(x$zeta, ...)
+ invisible(x)
+ }
+
+
+coef.svyolr<-function(object,intercepts=TRUE,...) {
+ if(intercepts)
+ c(object$coefficients, object$zeta)
+ else
+ object$coefficients
+ }
+
+summary.svyolr<-function (object, digits = max(3, .Options$digits - 3), correlation = FALSE,
+ ...)
+{
+ cc <- coef(object)
+ pc <- length(coef(object, FALSE))
+ q <- length(object$zeta)
+ coef <- matrix(0, pc + q, 3, dimnames = list(names(cc), c("Value",
+ "Std. Error", "t value")))
+ coef[, 1] <- cc
+ vc <- vcov(object)
+ z.ind <- (pc + 1):(pc + q)
+ gamma <- object$zeta
+ theta <- c(gamma[1], log(diff(gamma)))
+ jacobian <- function(theta) {
+ k <- length(theta)
+ etheta <- exp(theta)
+ mat <- matrix(0, k, k)
+ mat[, 1] <- rep(1, k)
+ for (i in 2:k) mat[i:k, i] <- etheta[i]
+ mat
+ }
+ J <- jacobian(theta)
+ vc[z.ind, z.ind] <- J %*% vc[z.ind, z.ind] %*% t(J)
+ coef[, 2] <- sd <- sqrt(diag(vc))
+ coef[, 3] <- coef[, 1]/coef[, 2]
+ object$coefficients <- coef
+ object$pc <- pc
+ object$digits <- digits
+ if (correlation)
+ object$correlation <- (vc/sd)/rep(sd, rep(pc + q, pc +
+ q))
+ class(object) <- "summary.svyolr"
+ object
+}
+
+print.summary.svyolr<-function (x, digits = x$digits, ...)
+{
+ if (!is.null(cl <- x$call)) {
+ cat("Call:\n")
+ dput(cl, control = NULL)
+ }
+ coef <- format(round(x$coefficients, digits = digits))
+ pc <- x$pc
+ if (pc > 0) {
+ cat("\nCoefficients:\n")
+ print(x$coefficients[seq_len(pc), , drop = FALSE], quote = FALSE,
+ ...)
+ }
+ else {
+ cat("\nNo coefficients\n")
+ }
+ cat("\nIntercepts:\n")
+ print(coef[(pc + 1):nrow(coef), , drop = FALSE], quote = FALSE,
+ ...)
+ if (nzchar(mess <- naprint(x$na.action)))
+ cat("(", mess, ")\n", sep = "")
+ if (!is.null(correl <- x$correlation)) {
+ cat("\nCorrelation of Coefficients:\n")
+ ll <- lower.tri(correl)
+ correl[ll] <- format(round(correl[ll], digits))
+ correl[!ll] <- ""
+ print(correl[-1, -ncol(correl)], quote = FALSE, ...)
+ }
+ invisible(x)
+}
+
+model.frame.svyolr<-function(formula, ...){
+ mcall <- match.call(svyolr, formula$call)
+ design<- eval(mcall$design)
+ formula<-eval(mcall$formula)
+ mf<-model.frame(formula,model.frame(design))
+ w<-weights(design, type="sampling")
+ if (is.null(naa<-attr(mf,"na.action")))
+ mf[["(weights)"]]<-w
+ else
+ mf[["(weights)"]]<-w[-naa]
+ mf
+ }
+
diff --git a/R/pFsum.R b/R/pFsum.R
new file mode 100644
index 0000000..75bc344
--- /dev/null
+++ b/R/pFsum.R
@@ -0,0 +1,48 @@
+pFsum<-function(x,df,a,ddf=Inf,lower.tail=TRUE,method=c("saddlepoint","integration","satterthwaite"),...){
+ if (ddf==Inf) return(pchisqsum(x,df=df,a=a,lower.tail=lower.tail,...))
+
+ method<-match.arg(method)
+ if (method=="integration" && !(requireNamespace("CompQuadForm",quietly=TRUE))){
+ warning("Package 'CompQuadForm' not found, using saddlepoint approximation")
+ method<-"saddlepoint"
+ }
+
+
+ if (method=="integration"){
+
+ int<-CompQuadForm::davies(0,lambda=c(a,-x/ddf), h=c(df,ddf),acc=1e-7)
+ if ( (int$ifault %in% c(0,2))){
+ rval<-int$Qq
+ } else {
+ rval<-CompQuadForm::davies(0,lambda=c(a,-x/ddf), h=c(df,ddf),acc=1e-5)$Qq
+ }
+ if(lower.tail)
+ return(1-rval)
+ else
+ return(rval)
+ } else if (method %in% c("satterthwaite","saddlepoint")){
+ if(any(df>1)){
+ a<-rep(a,df)
+ }
+ tr<-mean(a)
+ tr2<-mean(a^2)/(tr^2)
+ scale=tr*tr2
+ ndf=length(a)/tr2
+ rval<-pf(x/ndf/scale, ndf,ddf,lower.tail=lower.tail)
+
+ if (method=="saddlepoint"){
+ a<-c(a,-x/ddf)
+ df<-c(df,ddf)
+ if(any(df>1))
+ a<-rep(a,df)
+ s<-saddle(0,a)
+ if (!is.na(s)) {
+ if (lower.tail)
+ rval<-1-s
+ else
+ rval<-s
+ }
+ }
+ rval
+ }
+}
diff --git a/R/paley.R b/R/paley.R
new file mode 100644
index 0000000..25f5e3e
--- /dev/null
+++ b/R/paley.R
@@ -0,0 +1,118 @@
+## Paley construction of Hadamard matrices
+## Only implemented for GF(p), because it's
+## not entirely straightforward to find
+## representations of GF(p^m)
+
+paley<-function(n, nmax=2*n, prime=NULL, check=!is.null(prime)){
+
+ if(!is.null(prime) && missing(n)) n<-prime
+
+ ## these are primes with p+1 a multiple of 4
+ small.primes<-c(3, 7, 11, 19, 23, 31, 43, 47, 59, 67, 71, 79, 83, 103, 107,
+ 127, 131, 139, 151, 163, 167, 179, 191, 199, 211, 223, 227, 239,
+ 251, 263, 271, 283, 307, 311, 331, 347, 359, 367, 379, 383, 419,
+ 431, 439, 443, 463, 467, 479, 487, 491, 499, 503, 523, 547, 563,
+ 571, 587, 599, 607, 619, 631, 643, 647, 659, 683, 691, 719, 727,
+ 739, 743, 751, 787, 811, 823, 827, 839, 859, 863, 883, 887, 907,
+ 911, 919, 947, 967, 971, 983, 991, 1019, 1031, 1039, 1051, 1063,
+ 1087, 1091, 1103, 1123, 1151, 1163, 1171, 1187, 1223, 1231, 1259,
+ 1279, 1283, 1291, 1303, 1307, 1319, 1327, 1367, 1399, 1423, 1427,
+ 1439, 1447, 1451, 1459, 1471, 1483, 1487, 1499, 1511, 1523, 1531,
+ 1543, 1559, 1567, 1571, 1579, 1583, 1607, 1619, 1627, 1663, 1667,
+ 1699, 1723, 1747, 1759, 1783, 1787, 1811, 1823, 1831, 1847, 1867,
+ 1871, 1879, 1907, 1931, 1951, 1979, 1987, 1999, 2003, 2011, 2027,
+ 2039, 2063, 2083, 2087, 2099, 2111, 2131, 2143, 2179, 2203, 2207,
+ 2239, 2243, 2251, 2267, 2287, 2311, 2339, 2347, 2351, 2371, 2383,
+ 2399, 2411, 2423, 2447, 2459, 2467, 2503, 2531, 2539, 2543, 2551,
+ 2579, 2591, 2647, 2659, 2663, 2671, 2683, 2687, 2699, 2707, 2711,
+ 2719, 2731, 2767, 2791, 2803, 2819, 2843, 2851, 2879, 2887, 2903,
+ 2927, 2939, 2963, 2971, 2999, 3011, 3019, 3023, 3067, 3079, 3083,
+ 3119, 3163, 3167, 3187, 3191, 3203, 3251, 3259, 3271, 3299, 3307,
+ 3319, 3323, 3331, 3343, 3347, 3359, 3371, 3391, 3407, 3463, 3467,
+ 3491, 3499, 3511, 3527, 3539, 3547, 3559, 3571, 3583, 3607, 3623,
+ 3631, 3643, 3659, 3671, 3691, 3719, 3727, 3739, 3767, 3779, 3803,
+ 3823, 3847, 3851, 3863, 3907, 3911, 3919, 3923, 3931, 3943, 3947,
+ 3967, 4003, 4007, 4019, 4027, 4051, 4079, 4091, 4099, 4111, 4127,
+ 4139, 4159, 4211, 4219, 4231, 4243, 4259, 4271, 4283, 4327, 4339,
+ 4363, 4391, 4423, 4447, 4451, 4463, 4483, 4507, 4519, 4523, 4547,
+ 4567, 4583, 4591, 4603, 4639, 4643, 4651, 4663, 4679, 4691, 4703,
+ 4723, 4751, 4759, 4783, 4787, 4799, 4831, 4871, 4903, 4919, 4931,
+ 4943, 4951, 4967, 4987, 4999, 5003, 5011, 5023, 5039, 5051, 5059,
+ 5087, 5099, 5107, 5119, 5147, 5167, 5171, 5179, 5227, 5231, 5279,
+ 5303, 5323, 5347, 5351, 5387, 5399, 5407, 5419, 5431, 5443, 5471,
+ 5479, 5483, 5503, 5507, 5519, 5527, 5531, 5563, 5591, 5623, 5639,
+ 5647, 5651, 5659, 5683, 5711, 5743, 5779, 5783, 5791, 5807, 5827,
+ 5839, 5843, 5851, 5867, 5879, 5903, 5923, 5927, 5939, 5987, 6007,
+ 6011, 6043, 6047, 6067, 6079, 6091, 6131, 6143, 6151, 6163, 6199,
+ 6203, 6211, 6247, 6263, 6271, 6287, 6299, 6311, 6323, 6343, 6359,
+ 6367, 6379, 6427, 6451, 6491, 6547, 6551, 6563, 6571, 6599, 6607,
+ 6619, 6659, 6679, 6691, 6703, 6719, 6763, 6779, 6791, 6803, 6823,
+ 6827, 6863, 6871, 6883, 6899, 6907, 6911, 6947, 6959, 6967, 6971,
+ 6983, 6991, 7019, 7027, 7039, 7043, 7079, 7103, 7127, 7151, 7159,
+ 7187, 7207, 7211, 7219, 7243, 7247, 7283, 7307, 7331, 7351, 7411,
+ 7451, 7459, 7487, 7499, 7507, 7523, 7547, 7559, 7583, 7591, 7603,
+ 7607, 7639, 7643, 7687, 7691, 7699, 7703, 7723, 7727, 7759, 7823,
+ 7867, 7879, 7883, 7907, 7919)
+
+
+ if (is.null(prime)){
+ nceil<-nn <- n + 4 - (n %% 4)
+ if ( (n %% 4) +4 == (n %% 8)) {
+ while (!(nn %% 8)){ nn <- nn /2}
+ if ((nn-1) %in% small.primes){
+ m<-paley(prime=nn-1,check=check)
+ while(nn<nceil){
+ m<-rbind(cbind(m,m),cbind(m,1-m))
+ nn<-nn*2
+ }
+ return(m)
+ }
+ }
+
+ if (n>max(small.primes)) return(NULL)
+ p<-min(small.primes[small.primes>=n])
+ if ((p+1 > nceil+4) && (nceil+4 < nmax)) return(paley(nceil+3))
+ if (p>nmax) return(NULL)
+ } else{
+ p<-prime
+ if ((p+1) %% 4 !=0) {
+ warning("'prime'+1 is not divisible by 4")
+ return(NULL)
+ }
+ if (p<n) {
+ warning("'prime' is too small")
+ return(NULL)
+ }
+ }
+
+ m<-outer(0:(p-1) ,0:(p-1),"+") %% p
+
+ res<-integer(1+floor((p-1)/2))
+ res[1]<-0
+ res[2]<-1
+ for(i in 2:floor((p-1)/2))
+ res[i+1]<- (i*i) %% p
+
+ m[m %in% res]<-0
+ m[m>0]<-1
+
+ rval<-cbind(1,rbind(1,m))
+ if(check) {
+ if(!is.hadamard(rval))
+ warning("matrix is not Hadamard: is 'prime' really prime?")
+ }
+ rval
+}
+
+
+is.hadamard<-function(H, style=c("0/1","+-"), full.orthogonal.balance=TRUE){
+ if (is.matrix(H) && is.numeric(H) && (ncol(H)==nrow(H))){
+ H<-switch(match.arg(style),
+ "0/1"= 2*H-1,
+ "+-"=H)
+ isTRUE(all.equal(crossprod(H), diag(ncol(H))*ncol(H))) &&
+ all.equal(max(abs(H)),1) &&
+ (!full.orthogonal.balance || sum(H[-1,])==0)
+
+ } else FALSE
+}
diff --git a/R/pps.R b/R/pps.R
new file mode 100644
index 0000000..f5d36b5
--- /dev/null
+++ b/R/pps.R
@@ -0,0 +1,485 @@
+##
+## Constructing cov(R_i,R_j)/pi^*_ij, or \check{\check{\Delta}}_ij in Sarndal's notation
+## We use this form because it can be sparse and because it is easy to combine
+## multistage and multiphase sampling.
+##
+## The routines to compute the variances are in ht.R
+##
+
+pi2Dcheck<-function(pmat,tolerance=min(pmat)/1e4){
+ rval<-(pmat-outer(diag(pmat),diag(pmat)))/pmat
+ rval[abs(rval)<tolerance]<-0
+ as(rval,"sparseMatrix")
+}
+
+
+## Overton's approximation for PPS
+overton2Dcheck<-function(prob,strat=rep(1,length(prob))){
+ fbar<-outer(prob,prob,"+")/2
+ n<-ave(strat,strat,FUN=length)
+ rval<- 1- (n-fbar)/(n-1)
+ rval[!outer(strat,strat,"==") | fbar==1]<-0
+ diag(rval)<-(1-diag(fbar))
+ as(rval,"sparseMatrix")
+}
+
+multi.overton2Dcheck<-function(id,strata,prob){
+ nstage<-ncol(id)
+ rval<-vector("list",nstage)
+ for(stage in 1:nstage){
+ uid<-!duplicated(id[,stage])
+ rval[[stage]]<-list(id=id[,stage],
+ dcheck=overton2Dcheck(prob[uid,stage],
+ strata[uid,stage])
+ )
+ }
+ rval
+}
+
+## truncated Hartley-Rao approximation
+HRDcheck<-function(prob,strat=rep(1,length(prob)),p2bar){
+ fbar<-outer(prob,prob,"+")
+ n<-ave(strat,strat,FUN=length)
+ rval<- 1- (n-fbar+p2bar)/(n-1)
+ rval[!outer(strat,strat,"==") | fbar==1]<-0
+ diag(rval)<-(1-prob)
+ as(rval,"sparseMatrix")
+}
+
+multi.HRDcheck<-function(id,strata,prob,p2bar){
+ nstage<-ncol(id)
+ rval<-vector("list",nstage)
+ for(stage in 1:nstage){
+ uid<-!duplicated(id[,stage])
+ rval[[stage]]<-list(id=id[,stage],
+ dcheck=HRDcheck(prob[uid,stage],
+ strata[uid,stage],p2bar[[stage]][strata[uid,stage]])
+ )
+ }
+ rval
+}
+
+## truncated Hartley-Rao approximation, using sample estimate mean(p) for sum(p^2/n)
+
+multi.HR1Dcheck<-function(id,strata,prob){
+ nstage<-ncol(id)
+ rval<-vector("list",nstage)
+ for(stage in 1:nstage){
+ uid<-!duplicated(id[,stage])
+ rval[[stage]]<-list(id=id[,stage],
+ dcheck=HRDcheck(prob[uid,stage],
+ strata[uid,stage],
+ ave(prob[uid,stage], strata[uid,stage])
+ )
+ )
+ }
+ rval
+ }
+
+##not used yet
+combine_stages<-function(Dcheck1,Dcheck2){
+ as(-Dcheck1*Dcheck2+Dcheck1+Dcheck2,"sparseMatrix")
+}
+
+make_pps_covmat<-function(design,method){##FIXME
+ if (method=="overton")
+ multi.overton2Dcheck(design$cluster, design$strata, design$allprob)
+ else stop("method",method,"not recognized")
+
+}
+
+image.pps<-function(x,...){
+ Matrix::image(x$dcheck[[1]]$dcheck,...)
+}
+
+##
+pps_design<-function(method, ids,strata=NULL, probs=NULL, fpc=NULL,
+ subset, data,call=sys.call(),variance="HT",...){
+ UseMethod("pps_design")
+}
+pps_design.character<-function(method,ids,strata=NULL, probs=NULL, fpc=NULL, variables=variables,
+ subset, data,call=sys.call(),variance="HT",...){
+
+ if (length(ids[[2]])>1 && method!="brewer") stop("Multistage PPS sampling not supported with this method")
+ rval<-svydesign(ids=ids,strata=strata,weights=NULL,
+ probs=probs,fpc=fpc,data=data,pps="other")
+
+ deltacheck<-make_pps_covmat(rval, method)
+
+ rval$dcheck=deltacheck
+ rval$variance<-variance
+
+ rval$call<-call
+ class(rval) <- c("pps","survey.design")
+ rval
+}
+
+ppsmat<-function(jointprob, tolerance=0.0001){
+ if ((!is.matrix(jointprob)) || !(NROW(jointprob)==NCOL(jointprob)))
+ stop("jointprob must be a square matrix")
+ rval<-list(pij=jointprob, tolerance=tolerance,call=sys.call())
+ class(rval)<-"ppsmat"
+ rval
+}
+
+print.ppsmat<-function(x,...) {
+ cat("PPS: Joint probability matrix: ")
+ print(x$call)
+ invisible(x)
+}
+
+pps_design.ppsmat<-function(method,ids,strata=NULL, probs=NULL, fpc=NULL,variables=variables,
+ subset, data,call=sys.call(),variance="HT",...){
+
+ if (length(ids[[2]])>1) stop("Multistage PPS sampling not supported")
+ rval<-svydesign(ids=ids,strata=strata,weights=NULL,variables=variables,
+ probs=probs,fpc=fpc,data=data,pps="other")
+
+ deltacheck<-pi2Dcheck(method$pij,method$tolerance)
+ rval$variance<-variance
+
+ rval$dcheck<-list(list(id=1:nrow(method$pij), dcheck=deltacheck))
+
+ rval$call<-call
+ class(rval) <- c("pps","survey.design")
+ rval
+}
+
+HR<-function(psum=NULL, strata=NULL){
+ if (is.null(psum)) { ## estimate
+ rval<-list(pbar=NULL,call=sys.call())
+ } else if (is.data.frame(strata) || is.matrix(strata)){ #redundant
+ pbar<-lapply(1:NCOL(strata), function(i){
+ psum[!duplicated(strata[,i]),i]})
+ strata<-lapply(1:NCOL(strata), function(i){
+ strata[!duplicated(strata[,i]),i]})
+ rval<-list(pbar=pbar, strata=strata,call=sys.call())
+ } else if (is.null(strata) && is.numeric(psum) && length(psum)==1){
+ ## single number
+ rval<-list(pbar=list(psum),strata=list(1), call=sys.call())
+ } else{ ## non-redundant list
+ rval<-list(pbar=psum, strata=strata,call=sys.call())
+ }
+ class(rval)<-"HR"
+ rval
+}
+
+print.HR<-function(x,...) {
+ cat("PPS: Hartley-Rao correction: ")
+ print(x$call)
+ invisible(x)
+}
+pps_design.HR<-function(method,ids,strata=NULL, probs=NULL, fpc=NULL,
+ subset, data,call=sys.call(),variables=variables,variance="HT",...){
+
+ if (length(ids[[2]])>1) stop("Multistage PPS sampling not supported with this method")
+ rval<-svydesign(ids=ids,strata=strata,weights=NULL,
+ probs=probs,fpc=fpc,data=data,pps="other")
+
+ if (is.null(method$pbar)) ## sample estimate of sum(p^2/n)
+ deltacheck<-multi.HR1Dcheck(rval$cluster,rval$strata,rval$allprob)
+ else
+ deltacheck<-multi.HRDcheck(rval$cluster,rval$strata,rval$allprob, method$pbar)
+
+ rval$dcheck=deltacheck
+ rval$variance<-variance
+
+ rval$call<-call
+ class(rval) <- c("pps","survey.design")
+ rval
+}
+print.pps<-function(x,...){
+ cat("Sparse-matrix design object:\n ")
+ print(x$call)
+}
+
+summary.pps<-function(object,...){
+ class(object)<-"summary.pps"
+ object
+}
+
+print.summary.pps<-function(x,...,varnames=TRUE){
+ cat("Two-phase sparse-matrix design:\n ")
+ print(x$call)
+ cat("Sampling probabilities:\n")
+ print(summary(x$prob))
+ if (varnames){
+ cat("Data variables:\n")
+ print(names(x$variables))
+ }
+ invisible(x)
+}
+
+
+
+
+ppsvar<-function(x,design){
+ postStrata<-design$postStrata
+ est<-design$variance ##Yates-Grundy or Horvitz-Thompson
+ if (!is.null(postStrata)){
+ for (psvar in postStrata){
+ if (inherits(psvar, "greg_calibration")) {
+ if (psvar$stage==0){
+ ## G-calibration at population level
+ y<-qr.resid(psvar$qr,y/psvar$w)*psvar$w
+ } else {
+ ## G-calibration within clusters
+ stop("calibration within clusters not yet available for PPS designs")
+ }
+ } else {
+ ## ordinary post-stratification
+ psw<-attr(psvar, "weights")
+ postStrata<-as.factor(psvar)
+ psmeans<-rowsum(y/psw,psvar,reorder=TRUE)/as.vector(table(factor(psvar)))
+ x<- y-psmeans[match(psvar,sort(unique(psvar))),]*psw
+ }
+ }
+ }
+ dcheck<-design$dcheck
+ if (length(dcheck)!=1) stop("Multistage not implemented yet")
+ rval<-switch(est,HT=htvar.matrix(rowsum(x,dcheck[[1]]$id,reorder=FALSE),dcheck[[1]]$dcheck),
+ YG=ygvar.matrix(rowsum(x,dcheck[[1]]$id,reorder=FALSE),dcheck[[1]]$dcheck),
+ stop("can't happen"))
+ rval
+}
+
+svytotal.pps<-function(x,design, na.rm=FALSE, deff=FALSE,...){
+
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,model.frame(design), na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ } else {
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else {
+ if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ }
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ if(length(nas)>length(design$prob))
+ x<-x[nas==0,,drop=FALSE]
+ else
+ x[nas>0,]<-0
+ }
+
+ N<-sum(1/design$prob)
+ total <- colSums(x/as.vector(design$prob),na.rm=na.rm)
+ class(total)<-"svystat"
+ attr(total, "var")<-v<-ppsvar(x/design$prob,design)
+ attr(total,"statistic")<-"total"
+
+ if (is.character(deff) || deff){
+ nobs<-NROW(design$cluster)
+ if (deff=="replace")
+ vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2*(N-nobs)/N
+ else
+ vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2
+ attr(total, "deff")<-v/vsrs
+ }
+
+
+ return(total)
+ }
+
+svymean.pps<-function(x,design, na.rm=FALSE,deff=FALSE,...){
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,model.frame(design) ,na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ }
+ else {
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else {
+ if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ }
+
+
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ if (any(nas>0))
+ design<-design[nas==0,]
+ x[nas>0,]<-0
+ }
+
+ pweights<-1/design$prob
+ psum<-sum(pweights)
+ average<-colSums(x*pweights/psum)
+ x<-sweep(x,2,average)
+ v<-ppsvar(x*pweights/psum,design)
+ attr(average,"var")<-v
+ attr(average,"statistic")<-"mean"
+ class(average)<-"svystat"
+ if (is.character(deff) || deff){
+ nobs<-nrow(design)
+ if(deff=="replace"){
+ vsrs<-svyvar(x,design,na.rm=na.rm)/(nobs)
+ } else {
+ if(psum<nobs) {
+ vsrs<-NA*v
+ warning("Sample size greater than population size: are weights correctly scaled?")
+ } else{
+ vsrs<-svyvar(x,design,na.rm=na.rm)*(psum-nobs)/(psum*nobs)
+ }
+ }
+ attr(average, "deff")<-v/vsrs
+ }
+
+ return(average)
+}
+
+
+
+svyratio.pps<-function(numerator=formula, denominator, design, separate=FALSE,na.rm=FALSE,formula,...){
+
+ if (separate){
+ strats<-sort(unique(design$strata[,1]))
+ rval<-list(ratios=lapply(strats,
+ function(s) {
+ tmp<-svyratio(numerator, denominator,
+ subset(design, design$phase2$strata[,1] %in% s),
+ separate=FALSE,...)
+ attr(tmp,"call")<-bquote(Stratum==.(s))
+ tmp}))
+ names(rval$ratios)<-strats
+
+ class(rval)<-c("svyratio_separate")
+ rval$call<-sys.call()
+ rval$strata<-strats
+ return(rval)
+ }
+
+ if (inherits(numerator,"formula"))
+ numerator<-model.frame(numerator,model.frame(design),na.action=na.pass)
+ else if(typeof(numerator) %in% c("expression","symbol"))
+ numerator<-eval(numerator, design$variables)
+ if (inherits(denominator,"formula"))
+ denominator<-model.frame(denominator,model.frame(design),na.action=na.pass)
+ else if(typeof(denominator) %in% c("expression","symbol"))
+ denominator<-eval(denominator, model.frame(design))
+
+ nn<-NCOL(numerator)
+ nd<-NCOL(denominator)
+
+ all<-cbind(numerator,denominator)
+ nas<-!complete.cases(all)
+ if (na.rm){
+ design<-design[!nas,]
+ all<-all[!nas,,drop=FALSE]
+ numerator<-numerator[!nas,,drop=FALSE]
+ denominator<-denominator[!nas,,drop=FALSE]
+ }
+ allstats<-svytotal(all, design)
+ rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/"))
+
+
+ vars<-matrix(ncol=nd,nrow=nn)
+ for(i in 1:nn){
+ for(j in 1:nd){
+ r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob)
+ vars[i,j]<-ppsvar(r*1/design$prob, design)
+ }
+ }
+ colnames(vars)<-names(denominator)
+ rownames(vars)<-names(numerator)
+ rval$var<-vars
+ attr(rval,"call")<-sys.call()
+ class(rval)<-"svyratio"
+ rval
+
+ }
+
+
+"[.pps"<-function (x,i, ..., drop=TRUE){
+ if (!missing(i)){
+ ## Set weights to zero: don't try to save memory
+ ## There should be an easier way to complement a subscript..
+ if (is.logical(i) && any(!i)){
+ ## logical indexing: use !
+ x$prob[!i]<-Inf
+ x$dcheck<-lapply(x$dcheck, function(m) {m$dcheck[!i,!i]<-0; m})
+ } else if (is.numeric(i) && length(i)){
+ ## numeric indexing: use -
+ x$prob[-i]<-Inf
+ x$dcheck<-lapply(x$dcheck, function(m) {m$dcheck[-i,-i]<-0;m})
+ } else if (is.character(i)) {
+ ##character indexing: use brute force and ignorance
+ tmp<-x$prob[i,]
+ x$prob<-rep(Inf, length(x$prob))
+ x$prob[i,]<-tmp
+ x$dcheck<-lapply(x$dcheck, function(m) {n<-Matrix(ncol(m$dcheck),ncol(m$dcheck)); n[i,i]<-m$dcheck[i,i]; m$dcheck<-n;m})
+ }
+ index<-is.finite(x$prob)
+ psu<-!duplicated(x$cluster[index,1])
+ tt<-table(x$strata[index,1][psu])
+ if(any(tt==1)){
+ warning(sum(tt==1)," strata have only one PSU in this subset.")
+ }
+ } else {
+
+ }
+ x
+}
+
+degf.pps<-function(design,...) {
+ inset <- weights(design, "sampling") != 0
+ length(unique(design$cluster[inset, 1])) - length(unique(design$strata[inset,1]))
+}
+
+
+
+
+
+postStratify.pps<-function(design, ...) {
+ stop("postStratify not yet implemented for these pps designs. Use calibrate()")
+}
diff --git a/R/regtest.R b/R/regtest.R
new file mode 100755
index 0000000..e519819
--- /dev/null
+++ b/R/regtest.R
@@ -0,0 +1,291 @@
+##deviance methods not exported, used by method="LRT"
+deviance.svycoxph<-function(object,...) 2 * (object$ll[1] - object$ll[2])
+deviance.coxph<-function(object,...) 2 * (object$loglik[1] - object$loglik[2])
+
+regTermTest<-function(model, test.terms, null=NULL, df=NULL, method=c("Wald","LRT"), lrt.approximation="saddlepoint"){
+
+ method<-match.arg(method)
+
+ canonicalOrder<-function(term){
+ tt<-strsplit(term,":")
+ tt<-lapply(tt,sort)
+ sapply(tt,paste,collapse=":")
+ }
+
+
+ if(inherits(test.terms,"formula"))
+ test.terms<-attr(terms(test.terms),"term.labels")
+
+ okbeta<-!is.na(coef(model,na.rm=FALSE)) ## na.rm for svyglm
+ tt<-attr(terms(model),"term.labels")
+ aa<-attr(model.matrix(model),"assign")[okbeta]
+ if((inherits(model,"coxph")|| inherits(model,"svyloglin") || inherits(model,"svyolr")) && attr(terms(model),"intercept"))
+ aa<-aa[-1]
+ index<-which(aa %in% match(canonicalOrder(test.terms),canonicalOrder(tt)))
+ if (any(is.na(index)))
+ stop("Terms didn't match:",canonicalOrder(test.terms),canonicalOrder(tt))
+
+ beta<-coef(model)[index]
+
+ if (!is.null(null))
+ beta<-beta-null
+ V<-vcov(model)[index,index]
+
+ ## this should be rewritten as methods, but that's not happening any time soon.
+ if (is.null(df)){
+ if (inherits(model,"svyglm"))
+ df<-model$df.residual
+ else if (inherits(model, "svycoxph"))
+ df<-model$degf.resid
+ else if (inherits(model,"lm"))
+ df<-model$df.residual
+ else if (inherits(model,"coxph"))
+ df<-model$n-length(coef(model))
+ else if (inherits(model, "MIresult"))
+ df<-min(model$df[index])
+ else if (inherits(model,"svyloglin"))
+ df<-model$df+1-length(index)
+ else if (inherits(model, "svyolr"))
+ df<-model$df.residual
+ else
+ df<-length(resid(model))-length(coef(model))
+ }
+
+ if (method=="LRT"){
+ if (inherits(model,"svyglm"))
+ V0<-model$naive.cov
+ else if (inherits(model, "svycoxph"))
+ V0<-model$inv.info
+ else if (inherits(model,"lm"))
+ V0<-vcov(model)
+ else if (inherits(model,"coxph")){
+ if (is.null(model$naive.var))
+ V0<-model$var
+ else
+ V0<-model$naive.var
+ } else if (inherits(model,"svyolr")) {
+ V0<-solve(model$Hess)
+ } else stop("method='LRT' not supported for this model")
+ V0<-V0[index,index]
+ test.formula<-make.formula(test.terms)[[2]]
+ if (!("formula") %in% names(model$call))
+ names(model$call)[[2]]<-"formula"
+
+ model0<-eval(bquote(update(model, .~.-(.(test.formula)))))
+ chisq<-deviance(model0)-deviance(model)
+ misspec<-eigen(solve(V0)%*%V, only.values=TRUE)$values
+ if (df==Inf)
+ p<-pchisqsum(chisq,rep(1,length(misspec)),misspec,method=lrt.approximation,lower.tail=FALSE)
+ else
+ p<-pFsum(chisq,rep(1,length(misspec)),misspec,ddf=df,method=lrt.approximation,lower.tail=FALSE)
+
+ rval<-list(call=sys.call(),mcall=model$call,chisq=chisq,
+ df=length(index),test.terms=test.terms,
+ p=p,lambda=misspec,ddf=df)
+ class(rval)<-"regTermTestLRT"
+ return(rval)
+ }
+
+
+ chisq<-beta%*%solve(V)%*%beta
+ if (df<Inf){
+ Ftest<-chisq/length(index)
+ rval<-list(call=sys.call(),mcall=model$call, Ftest=Ftest,
+ df=length(index),ddf=df,test.terms=test.terms,
+ p=pf(Ftest,length(index),df,lower.tail=FALSE))
+ } else {
+ rval<-list(call=sys.call(),mcall=model$call,chisq=chisq,
+ df=length(index),test.terms=test.terms,
+ p=pchisq(chisq,length(index),lower.tail=FALSE))
+ }
+ class(rval)<-"regTermTest"
+ rval
+}
+
+
+print.regTermTest<-function(x,...){
+ cat("Wald test for ")
+ cat(x$test.terms)
+ cat("\n in ")
+ print(x$mcall)
+ if(is.null(x$Ftest))
+ cat("Chisq = ",x$chisq," on ",x$df," df: p=",format.pval(x$p),"\n")
+ else
+ cat("F = ",x$Ftest," on ",x$df," and ",x$ddf," df: p=",format.pval(x$p),"\n")
+ invisible(x)
+}
+
+print.regTermTestLRT<-function(x,...){
+ if (is.null(x$ddf) || x$ddf==Inf)
+ cat("Working (Rao-Scott) LRT for ")
+ else
+ cat("Working (Rao-Scott+F) LRT for ")
+ cat(x$test.terms)
+ cat("\n in ")
+ print(x$mcall)
+ chisq<-x$chisq/mean(x$lambda)
+ cat("Working 2logLR = ",chisq, 'p=',format.pval(x$p),"\n")
+ if (length(x$lambda)>1)
+ cat("(scale factors: ",signif(x$lambda/mean(x$lambda),2),")")
+ else cat("df=1")
+ if (!is.null(x$ddf) && is.finite(x$ddf))
+ cat("; denominator df=",x$ddf)
+ cat("\n")
+ invisible(x)
+}
+
+svycontrast<-function(stat, contrasts,...) UseMethod("svycontrast")
+
+match.names <- function(nms,contrasts){
+ l<-length(nms)
+ ll<-sapply(contrasts,length)
+ if (all(ll==l)) return(contrasts)
+
+ if (l==0) stop("No names to match")
+ if( !all( unlist(sapply(contrasts,names)) %in% nms))
+ stop("names not matched")
+
+ lapply(contrasts,
+ function(con) {
+ r<-numeric(l)
+ names(r)<-nms
+ r[names(con)]<-con
+ r
+ })
+
+}
+
+contrast<-function(coef,var,contrasts){
+ nas<-is.na(var[,1])
+ drop<-nas & apply(contrasts,2,function(v) all(v==0))
+ if(any(drop)){
+ contrasts<-contrasts[,!drop,drop=FALSE]
+ coef<-coef[!drop]
+ var<-var[!drop,!drop,drop=FALSE]
+ }
+ if (any(is.na(coef))){
+ badin<-is.na(coef)
+ bad<-((contrasts!=0)%*%is.na(coef))>0
+ rval<-rep(NA,NROW(contrasts))
+ rval[!bad]<-contrasts[!bad,!badin,drop=FALSE]%*%coef[!badin]
+ v<-matrix(NA,length(rval),length(rval))
+ v[!bad,!bad]<-contrasts[!bad,!badin,drop=FALSE]%*%var[!badin,!badin,drop=FALSE]%*%t(contrasts[!bad,!badin,drop=FALSE])
+ dimnames(v)<-list(names(rval),names(rval))
+ rval<-drop(rval)
+ attr(rval, "var")<-v
+ } else{
+ rval<-drop(contrasts%*%coef)
+ v<-contrasts%*%var%*%t(contrasts)
+ dimnames(v)<-list(names(rval),names(rval))
+ attr(rval,"var")<-v
+ }
+ rval
+}
+
+svycontrast.svystat<-function(stat, contrasts,...){
+ if (!is.list(contrasts))
+ contrasts<-list(contrast=contrasts)
+ if (is.call(contrasts[[1]])){
+ rval<-nlcon(contrasts,as.list(coef(stat)), vcov(stat))
+ class(rval)<-"svrepstat"
+ attr(rval,"statistic")<-"nlcon"
+ return(rval)
+ }
+ contrasts<-match.names(names(coef(stat)),contrasts)
+ contrasts<-do.call(rbind,contrasts)
+ coef<-contrast(coef(stat),vcov(stat),contrasts)
+ class(coef)<-"svystat"
+ attr(coef,"statistic")<-"contrast"
+ coef
+}
+
+svycontrast.svystat<-function(stat, contrasts,...){
+ if (!is.list(contrasts))
+ contrasts<-list(contrast=contrasts)
+ if (is.call(contrasts[[1]])){
+ rval<-nlcon(contrasts,as.list(coef(stat)), vcov(stat))
+ class(rval)<-"svrepstat"
+ attr(rval,"statistic")<-"nlcon"
+ return(rval)
+ }
+ contrasts<-match.names(names(coef(stat)),contrasts)
+ contrasts<-do.call(rbind,contrasts)
+ coef<-contrast(coef(stat),vcov(stat),contrasts)
+ class(coef)<-"svystat"
+ attr(coef,"statistic")<-"contrast"
+ coef
+}
+
+svycontrast.svyolr<-function(stat, contrasts,...){
+ if (!is.list(contrasts))
+ contrasts<-list(contrast=contrasts)
+ if (is.call(contrasts[[1]])){
+ rval<-nlcon(contrasts,as.list(c(coef(stat),stat$zeta)), vcov(stat))
+ class(rval)<-"svystat"
+ attr(rval,"statistic")<-"nlcon"
+ return(rval)
+ }
+ contrasts <- match.names(names(coef(stat)), contrasts)
+ contrasts<-do.call(rbind,contrasts)
+ coef<-contrast(as.vector(as.matrix(coef(stat))),
+ vcov(stat),contrasts)
+ class(coef)<-"svystat"
+ attr(coef,"statistic")<-"contrast"
+ coef
+}
+
+
+svycontrast.svyglm<-svycontrast.svystat
+svycontrast.svycoxph<-svycontrast.svystat
+svycontrast.svyby<-svycontrast.svystat
+svycontrast.default<-svycontrast.svystat
+
+svycontrast.svrepstat<-function(stat, contrasts,...){
+ if (!is.list(contrasts))
+ contrasts<-list(contrast=contrasts)
+ if (is.call(contrasts[[1]])){
+ if (is.list(stat)){ ##replicates
+ rval<-list(nlcon=nlcon(contrasts,as.list(coef(stat)),vcov(stat)))
+ colnames(stat$replicates)<-names(coef(stat))
+ rval$replicates<-t(apply(stat$replicates,1,
+ function(repi) nlcon(datalist=as.list(repi),
+ exprlist=contrasts, varmat=NULL)))
+ attr(rval$nlcon,"statistic")<-"nlcon"
+ } else {
+ rval<-nlcon(contrasts,as.list(coef(stat)), vcov(stat))
+ attr(rval,"statistic")<-"nlcon"
+ }
+ class(rval)<-"svrepstat"
+ return(rval)
+ }
+ contrasts<-match.names(names(coef(stat)), contrasts)
+ contrasts<-do.call(rbind,contrasts)
+
+ coef<-contrast(coef(stat), vcov(stat), contrasts)
+ if (is.list(stat)){
+ coef<-list(contrast=coef,
+ replicates=crossprod(stat$replicates, contrasts))
+ }
+ class(coef)<-"svrepstat"
+ attr(coef,"statistic")<-"contrast"
+ coef
+}
+
+
+
+nlcon<-function(exprlist, datalist, varmat){
+ if (!is.list(exprlist)) exprlist<-list(contrast=exprlist)
+ dexprlist<-lapply(exprlist,
+ function(expr) deriv(expr, names(datalist))[[1]])
+ values<-lapply(dexprlist,
+ function(dexpr) eval(do.call(substitute, list(dexpr,datalist))))
+ if (is.null(varmat))
+ return(do.call(c,values))
+ jac<-do.call(rbind,lapply(values,
+ function(value) attr(value,"gradient")))
+ var<-jac%*%varmat%*%t(jac)
+ values<-do.call(c, values)
+ dimnames(var)<-list(names(values),names(values))
+ attr(values, "var")<-var
+ values
+}
diff --git a/R/stdize.R b/R/stdize.R
new file mode 100644
index 0000000..ea89350
--- /dev/null
+++ b/R/stdize.R
@@ -0,0 +1,24 @@
+##
+## This is how NCHS does it: postStratify to a table where proportions for by= are specified and then are applied within each cell of over=
+##
+svystandardize<-function(design, by, over, population, excluding.missing=NULL){
+
+ if (!is.null(excluding.missing)){
+ mf<-model.frame(excluding.missing, model.frame(design),na.action=na.omit)
+ naa<-attr(mf,"na.action")
+ if(!is.null(naa)) design<-design[-naa,]
+ }
+
+ if(is.data.frame(population)) population<-population$Freq
+
+ freemargins<-as.data.frame(svytable(over, design))
+ fixedmargins<-as.data.frame(svytable(by,design))
+ fixedmargins$Freq<-as.vector(population)/sum(as.vector(population))
+ combined<-make.formula(c(attr(terms(by),"term.labels"), attr(terms(over),"term.labels")))
+ allmargins<-as.data.frame(svytable(combined,design))
+ allmargins$Freq<-as.vector(outer(fixedmargins$Freq, freemargins$Freq))
+
+ design<-postStratify(design, combined, allmargins,partial=TRUE)
+ design$call<-sys.call()
+ design
+}
diff --git a/R/stratsample.R b/R/stratsample.R
new file mode 100644
index 0000000..95ef84c
--- /dev/null
+++ b/R/stratsample.R
@@ -0,0 +1,13 @@
+stratsample<-function(strata, counts){
+ strata<-as.character(strata)
+ n<-length(strata)
+ rval <- integer(sum(counts))
+ allrows<-1:n
+ j<-0
+ for(i in 1:length(counts)) {
+ thisstrat<-names(counts)[i]
+ rval[j+(1:counts[i])]<-sample(allrows[strata==thisstrat],counts[i])
+ j<-j+counts[i]
+ }
+ rval
+ }
\ No newline at end of file
diff --git a/R/survey.R b/R/survey.R
new file mode 100755
index 0000000..f990040
--- /dev/null
+++ b/R/survey.R
@@ -0,0 +1,2154 @@
+
+make.formula<-function(names) formula(paste("~",paste(names,collapse="+")))
+
+dimnames.survey.design<-function(x) dimnames(x$variables)
+dimnames.svyrep.design<-function(x) dimnames(x$variables)
+dimnames.twophase<-function(x) dimnames(x$phase1$sample$variables)
+
+oldsvydesign<-function(ids,probs=NULL,strata=NULL,variables=NULL, fpc=NULL,
+ data=NULL, nest=FALSE, check.strata=!nest,weights=NULL){
+
+ .Deprecated("svydesign")
+
+ ## less memory-hungry version for sparse tables
+ interaction<-function (..., drop = TRUE) {
+ args <- list(...)
+ narg <- length(args)
+ if (narg == 1 && is.list(args[[1]])) {
+ args <- args[[1]]
+ narg <- length(args)
+ }
+
+ ls<-sapply(args,function(a) length(levels(a)))
+ ans<-do.call("paste",c(lapply(args,as.character),sep="."))
+ ans<-factor(ans)
+ return(ans)
+
+ }
+
+
+ na.failsafe<-function(object,...){
+ if (NCOL(object)==0)
+ object
+ else na.fail(object)
+ }
+
+ if(inherits(ids,"formula")) {
+ mf<-substitute(model.frame(ids,data=data,na.action=na.failsafe))
+ ids<-eval.parent(mf)
+ } else if (!is.null(ids))
+ ids<-na.fail(data.frame(ids))
+
+ if(inherits(probs,"formula")){
+ mf<-substitute(model.frame(probs,data=data,na.action=na.failsafe))
+ probs<-eval.parent(mf)
+ }
+
+ if(inherits(weights,"formula")){
+ mf<-substitute(model.frame(weights,data=data,na.action=na.failsafe))
+ weights<-eval.parent(mf)
+ } else if (!is.null(weights))
+ weights<-na.fail(data.frame(weights))
+
+ if(!is.null(weights)){
+ if (!is.null(probs))
+ stop("Can't specify both sampling weights and probabilities")
+ else
+ probs<-1/weights
+ }
+
+
+
+ if (!is.null(strata)){
+ if(inherits(strata,"formula")){
+ mf<-substitute(model.frame(strata,data=data, na.action=na.failsafe))
+ strata<-eval.parent(mf)
+ }
+ if(is.list(strata))
+ strata<-na.fail(do.call("interaction", strata))
+ if (!is.factor(strata))
+ strata<-factor(strata)
+ has.strata<-TRUE
+ } else {
+ strata<-factor(rep(1,NROW(ids)))
+ has.strata <-FALSE
+ }
+
+ if (inherits(variables,"formula")){
+ mf<-substitute(model.frame(variables,data=data,na.action=na.pass))
+ variables <- eval.parent(mf)
+ } else if (is.null(variables)){
+ variables<-data
+ } else
+ variables<-data.frame(variables)
+
+
+ if (inherits(fpc,"formula")){
+ mf<-substitute(model.frame(fpc,data=data,na.action=na.failsafe))
+ fpc<-eval.parent(mf)
+ if (length(fpc))
+ fpc<-fpc[,1]
+ }
+
+ if (is.null(ids) || NCOL(ids)==0)
+ ids<-data.frame(.id=seq(length=NROW(variables)))
+
+ ## force subclusters nested in clusters
+ if (nest && NCOL(ids)>1){
+ N<-ncol(ids)
+ for(i in 2:(N)){
+ ids[,i]<-do.call("interaction", ids[,1:i,drop=TRUE])
+ }
+ }
+ ## force clusters nested in strata
+ if (nest && has.strata && NCOL(ids)){
+ N<-NCOL(ids)
+ for(i in 1:N)
+ ids[,i]<-do.call("interaction", list(strata, ids[,i]))
+ }
+
+ ## check if clusters nested in strata
+ if (check.strata && nest)
+ warning("No point in check.strata=TRUE if nest=TRUE")
+ if(check.strata && !is.null(strata) && NCOL(ids)){
+ sc<-rowSums(table(ids[,1],strata)>0)
+ if(any(sc>1)) stop("Clusters not nested in strata")
+ }
+
+ ## Put degrees of freedom (# of PSUs in each stratum) in object, to
+ ## allow subpopulations
+ if (NCOL(ids)){
+ nPSU<-table(strata[!duplicated(ids[,1])])
+ }
+
+
+ if (!is.null(fpc)){
+
+ if (NCOL(ids)>1){
+ if (all(fpc<1))
+ warning("FPC is not currently supported for multi-stage sampling")
+ else
+ stop("Can't compute FPC from population size for multi-stage sampling")
+ }
+
+ ## Finite population correction: specified per observation
+ if (is.numeric(fpc) && length(fpc)==NROW(variables)){
+ tbl<-by(fpc,list(strata),unique)
+ if (any(sapply(tbl,length)!=1))
+ stop("fpc not constant within strata")
+ fpc<-data.frame(strata=factor(rownames(tbl),levels=levels(strata)),
+ N=as.vector(tbl))
+ }
+ ## Now reduced to fpc per stratum
+ nstr<-table(strata[!duplicated(ids[[1]])])
+
+ if (all(fpc[,2]<=1)){
+ fpc[,2]<- nstr[match(as.character(fpc[,1]), names(nstr))]/fpc[,2]
+ } else if (any(fpc[,2]<nstr[match(as.character(fpc[,1]), names(nstr))]))
+ stop("Over 100% sampling in some strata")
+
+ }
+
+ ## if FPC specified, but no weights, use it for weights
+ if (is.null(probs) && is.null(weights) && !is.null(fpc)){
+ pstr<-nstr[match(as.character(fpc[,1]), names(nstr))]/fpc[,2]
+ probs<-pstr[match(as.character(strata),as.character(fpc[,1]))]
+ probs<-as.vector(probs)
+ }
+
+
+ certainty<-rep(FALSE,length(unique(strata)))
+ names(certainty)<-as.character(unique(strata))
+ if (any(nPSU==1)){
+ ## lonely PSUs: are they certainty PSUs?
+ if (!is.null(fpc)){
+ certainty<- fpc$N < 1.01
+ names(certainty)<-as.character(fpc$strata)
+ } else if (all(as.vector(probs)<=1)){
+ certainty<- !is.na(match(as.character(unique(strata)),as.character(strata)[probs > 0.99]))
+ names(certainty)<-as.character(unique(strata))
+ } else {
+ warning("Some strata have only one PSU and I can't tell if they are certainty PSUs")
+ }
+
+ }
+
+ if (is.numeric(probs) && length(probs)==1)
+ probs<-rep(probs, NROW(variables))
+
+ if (length(probs)==0) probs<-rep(1,NROW(variables))
+
+ if (NCOL(probs)==1) probs<-data.frame(probs)
+
+ rval<-list(cluster=ids)
+ rval$strata<-strata
+ rval$has.strata<-has.strata
+ rval$prob<- apply(probs,1,prod)
+ rval$allprob<-probs
+ rval$call<-match.call()
+ rval$variables<-variables
+ rval$fpc<-fpc
+ rval$certainty<-certainty
+ rval$call<-sys.call()
+ rval$nPSU<-nPSU
+ class(rval)<-"survey.design"
+ rval
+ }
+
+print.survey.design<-function(x,varnames=FALSE,design.summaries=FALSE,...){
+ .svycheck(x)
+ n<-NROW(x$cluster)
+ if (x$has.strata) cat("Stratified ")
+ un<-length(unique(x$cluster[,1]))
+ if(n==un){
+ cat("Independent Sampling design\n")
+ is.independent<-TRUE
+ } else {
+ cat(NCOL(x$cluster),"- level Cluster Sampling design\n")
+ nn<-lapply(x$cluster,function(i) length(unique(i)))
+ cat(paste("With (",paste(unlist(nn),collapse=", "),") clusters.\n",sep=""))
+ is.independent<-FALSE
+ }
+ print(x$call)
+ if (design.summaries){
+ cat("Probabilities:\n")
+ print(summary(x$prob))
+ if(x$has.strata){
+ cat("Stratum sizes: \n")
+ a<-rbind(obs=table(x$strata),
+ design.PSU=x$nPSU,
+ actual.PSU=if(!is.independent || !is.null(x$fpc))
+ table(x$strata[!duplicated(x$cluster[,1])]))
+ print(a)
+ }
+ if (!is.null(x$fpc)){
+ if (x$has.strata) {
+ cat("Population stratum sizes (PSUs): \n")
+ print(x$fpc)
+ } else {
+ cat("Population size (PSUs):",x$fpc[,2],"\n")
+ }
+ }
+ }
+ if (varnames){
+ cat("Data variables:\n")
+ print(names(x$variables))
+ }
+ invisible(x)
+}
+
+"[.survey.design"<-function (x,i, ...){
+
+ if (!missing(i)){
+ if (is.calibrated(x)){
+ tmp<-x$prob[i,]
+ x$prob<-rep(Inf, length(x$prob))
+ x$prob[i,]<-tmp
+ } else {
+ x$variables<-"[.data.frame"(x$variables,i,...,drop=FALSE)
+ x$cluster<-x$cluster[i,,drop=FALSE]
+ x$prob<-x$prob[i]
+ x$allprob<-x$allprob[i,,drop=FALSE]
+ x$strata<-x$strata[i]
+ }
+ } else {
+ x$variables<-x$variables[,...,drop=FALSE]
+ }
+
+ x
+}
+
+"[<-.survey.design"<-function(x, ...,value){
+ if (inherits(value, "survey.design"))
+ value<-value$variables
+ x$variables[...]<-value
+ x
+}
+
+dim.survey.design<-function(x){
+ dim(x$variables)
+}
+
+na.fail.survey.design<-function(object,...){
+ tmp<-na.fail(object$variables,...)
+ object
+}
+
+na.omit.survey.design<-function(object,...){
+ tmp<-na.omit(object$variables,...)
+ omit<-attr(tmp,"na.action")
+ if (length(omit)){
+ object<-object[-omit,]
+ object$variables<-tmp
+ attr(object,"na.action")<-omit
+ }
+ object
+}
+
+na.exclude.survey.design<-function(object,...){
+ tmp<-na.exclude(object$variables,...)
+ exclude<-attr(tmp,"na.action")
+ if (length(exclude)){
+ object<-object[-exclude,]
+ object$variables<-tmp
+ attr(object,"na.action")<-exclude
+ }
+ object
+}
+
+
+update.survey.design<-function(object,...){
+
+ dots<-substitute(list(...))[-1]
+ newnames<-names(dots)
+
+ for(j in seq(along=dots)){
+ object$variables[,newnames[j]]<-eval(dots[[j]],object$variables, parent.frame())
+ }
+
+ object$call<-sys.call(-1)
+ object
+}
+
+
+subset.survey.design<-function(x,subset,...){
+ e <- substitute(subset)
+ r <- eval(e, x$variables, parent.frame())
+ r <- r & !is.na(r)
+ x<-x[r,]
+ x$call<-sys.call(-1)
+ x
+}
+
+summary.survey.design<-function(object,...){
+ class(object)<-"summary.survey.design"
+ object
+}
+
+print.summary.survey.design<-function(x,...){
+ y<-x
+ class(y)<-"survey.design"
+ print(y,varnames=TRUE,design.summaries=TRUE,...)
+}
+
+postStratify.survey.design<-function(design, strata, population, partial=FALSE,...){
+
+ if(inherits(strata,"formula")){
+ mf<-substitute(model.frame(strata, data=design$variables,na.action=na.fail))
+ strata<-eval.parent(mf)
+ }
+ strata<-as.data.frame(strata)
+
+ sampletable<-xtabs(I(1/design$prob)~.,data=strata)
+ sampletable<-as.data.frame(sampletable)
+
+ if (inherits(population,"table"))
+ population<-as.data.frame(population)
+ else if (is.data.frame(population))
+ population$Freq <- as.vector(population$Freq) ##allows Freq computed by tapply()
+ else
+ stop("population must be a table or dataframe")
+
+ if (!all(names(strata) %in% names(population)))
+ stop("Stratifying variables don't match")
+ nn<- names(population) %in% names(strata)
+ if (sum(!nn)!=1)
+ stop("stratifying variables don't match")
+
+ names(population)[which(!nn)]<-"Pop.Freq"
+
+ both<-merge(sampletable, population, by=names(strata), all=TRUE)
+
+ samplezero <- both$Freq %in% c(0, NA)
+ popzero <- both$Pop.Freq %in% c(0, NA)
+ both<-both[!(samplezero & popzero),]
+
+ if (any(onlysample<- popzero & !samplezero)){
+ print(both[onlysample,])
+ stop("Strata in sample absent from population. This Can't Happen")
+ }
+ if (any(onlypop <- samplezero & !popzero)){
+ if (partial){
+ both<-both[!onlypop,]
+ warning("Some strata absent from sample: ignored")
+ } else {
+ print(both[onlypop,])
+ stop("Some strata absent from sample: use partial=TRUE to ignore them.")
+ }
+ }
+
+ reweight<-both$Pop.Freq/both$Freq
+ both$label <- do.call("interaction", list(both[,names(strata)]))
+ designlabel <- do.call("interaction", strata)
+ index<-match(designlabel, both$label)
+
+ attr(index,"oldweights")<-1/design$prob
+ design$prob<-design$prob/reweight[index]
+ attr(index,"weights")<-1/design$prob
+ design$postStrata<-c(design$postStrata,list(index))
+
+ ## Do we need to iterate here a la raking to get design strata
+ ## and post-strata both balanced?
+ design$call<-sys.call(-1)
+
+ design
+}
+
+
+svyCprod<-function(x, strata, psu, fpc, nPSU, certainty=NULL, postStrata=NULL,
+ lonely.psu=getOption("survey.lonely.psu")){
+
+ x<-as.matrix(x)
+ n<-NROW(x)
+
+ ## Remove post-stratum means, which may cut across PSUs
+ if(!is.null(postStrata)){
+ for (psvar in postStrata){
+ if (inherits(psvar, "greg_calibration") || inherits(psvar, "raking"))
+ stop("rake() and calibrate() not supported for old-style design objects")
+ psw<-attr(psvar,"weights")
+ psmeans<-rowsum(x/psw,psvar,reorder=TRUE)/as.vector(table(factor(psvar)))
+ x<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw
+ }
+ }
+
+ ##First collapse over PSUs
+
+ if (is.null(strata)){
+ strata<-rep("1",n)
+ if (!is.null(nPSU))
+ names(nPSU)<-"1"
+ }
+ else
+ strata<-as.character(strata) ##can't use factors as indices in for()'
+
+ if (is.null(certainty)){
+ certainty<-rep(FALSE,length(strata))
+ names(certainty)<-strata
+ }
+
+ if (!is.null(psu)){
+ x<-rowsum(x, psu, reorder=FALSE)
+ strata<-strata[!duplicated(psu)]
+ n<-NROW(x)
+ }
+
+ if (!is.null(nPSU)){
+ obsn<-table(strata)
+ dropped<-nPSU[match(names(obsn),names(nPSU))]-obsn
+ if(sum(dropped)){
+ xtra<-matrix(0,ncol=NCOL(x),nrow=sum(dropped))
+ strata<-c(strata,rep(names(dropped),dropped))
+ if(is.matrix(x))
+ x<-rbind(x,xtra)
+ else
+ x<-c(x,xtra)
+ n<-NROW(x)
+ }
+ } else obsn<-table(strata)
+
+ if(is.null(strata)){
+ x<-t(t(x)-colMeans(x))
+ } else {
+ strata.means<-drop(rowsum(x,strata, reorder=FALSE))/drop(rowsum(rep(1,n),strata, reorder=FALSE))
+ if (!is.matrix(strata.means))
+ strata.means<-matrix(strata.means, ncol=NCOL(x))
+ x<- x- strata.means[ match(strata, unique(strata)),,drop=FALSE]
+ }
+
+ p<-NCOL(x)
+ v<-matrix(0,p,p)
+
+ ss<-unique(strata)
+ for(s in ss){
+ this.stratum <- strata %in% s
+
+ ## original number of PSUs in this stratum
+ ## before missing data/subsetting
+ this.n <-nPSU[match(s,names(nPSU))]
+
+ this.df <- this.n/(this.n-1)
+
+ if (is.null(fpc))
+ this.fpc <- 1
+ else{
+ this.fpc <- fpc[,2][ fpc[,1]==as.character(s)]
+ this.fpc <- (this.fpc - this.n)/this.fpc
+ }
+
+ xs<-x[this.stratum,,drop=FALSE]
+
+ this.certain<-certainty[names(certainty) %in% s]
+
+ ## stratum with only 1 design cluster leads to undefined variance
+ lonely.psu<-match.arg(lonely.psu, c("remove","adjust","fail",
+ "certainty","average"))
+ if (this.n==1 && !this.certain){
+ this.df<-1
+ if (lonely.psu=="fail")
+ stop("Stratum ",s, " has only one sampling unit.")
+ else if (lonely.psu!="certainty")
+ warning("Stratum ",s, " has only one sampling unit.")
+ if (lonely.psu=="adjust")
+ xs<-strata.means[match(s,ss),,drop=FALSE]
+ } else if (obsn[match(s,names(obsn))]==1 && !this.certain){
+ ## stratum with only 1 cluster left after subsetting
+ warning("Stratum ",s," has only one PSU in this subset.")
+ if (lonely.psu=="adjust")
+ xs<-strata.means[match(s,ss),,drop=FALSE]
+ }
+ ## add it up
+ if (!this.certain)
+ v<-v+crossprod(xs)*this.df*this.fpc
+ }
+ if (lonely.psu=="average"){
+ v<- v/(1-mean(obsn==1 & !certainty))
+ }
+ v
+}
+
+
+
+svymean<-function(x, design,na.rm=FALSE,...){
+ .svycheck(design)
+ UseMethod("svymean",design)
+}
+
+svymean.survey.design<-function(x,design, na.rm=FALSE,deff=FALSE,...){
+
+ if (!inherits(design,"survey.design"))
+ stop("design is not a survey design")
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$variables,na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ }
+ else if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ x<-x[nas==0,,drop=FALSE]
+ }
+
+ pweights<-1/design$prob
+ psum<-sum(pweights)
+ average<-colSums(x*pweights/psum)
+ x<-sweep(x,2,average)
+ v<-svyCprod(x*pweights/psum,design$strata,design$cluster[[1]], design$fpc,
+ design$nPSU,design$certainty, design$postStrata)
+ attr(average,"var")<-v
+ attr(average,"statistic")<-"mean"
+ class(average)<-"svystat"
+ if (is.character(deff) || deff){
+ nobs<-NROW(design$cluster)
+ vsrs<-svyvar(x,design,na.rm=na.rm)/nobs
+ vsrs<-vsrs*(psum-nobs)/psum
+ attr(average, "deff")<-v/vsrs
+ }
+
+ return(average)
+}
+
+
+print.svystat<-function(x,...){
+ vv<-attr(x,"var")
+ if (is.matrix(vv))
+ m<-cbind(x,sqrt(diag(vv)))
+ else
+ m<-cbind(x,sqrt(vv))
+ hasdeff<-!is.null(attr(x,"deff"))
+ if (hasdeff) {
+ m<-cbind(m,deff(x))
+ colnames(m)<-c(attr(x,"statistic"),"SE","DEff")
+ } else {
+ colnames(m)<-c(attr(x,"statistic"),"SE")
+ }
+ printCoefmat(m)
+}
+
+as.data.frame.svystat<-function(x,...){
+ rval<-data.frame(statistic=coef(x),SE=SE(x))
+ names(rval)[1]<-attr(x,"statistic")
+ if (!is.null(attr(x,"deff")))
+ rval<-cbind(rval,deff=deff(x))
+ rval
+}
+
+coef.svystat<-function(object,...){
+ attr(object,"statistic")<-NULL
+ attr(object,"deff")<-NULL
+ attr(object,"var")<-NULL
+ unclass(object)
+}
+
+vcov.svystat<-function(object,...){
+ as.matrix(attr(object,"var"))
+}
+
+SE.svystat<-function(object,...){
+ v<-vcov(object)
+ if (!is.matrix(v) || NCOL(v)==1) sqrt(v) else sqrt(diag(v))
+}
+
+deff <- function(object,quietly=FALSE,...) UseMethod("deff")
+
+deff.default <- function(object, quietly=FALSE,...){
+ rval<-attr(object,"deff")
+ if (is.null(rval)) {
+ if(!quietly)
+ warning("object has no design effect information")
+ } else rval<-diag(as.matrix(rval))
+ rval
+}
+
+cv<-function(object,...) UseMethod("cv")
+
+cv.default<-function(object, warn=TRUE, ...){
+ rval<-SE(object)/coef(object)
+ if (warn && any(coef(object)<0,na.rm=TRUE)) warning("CV may not be useful for negative statistics")
+ rval
+}
+
+
+svytotal<-function(x,design,na.rm=FALSE,...){
+ .svycheck(design)
+ UseMethod("svytotal",design)
+}
+svytotal.survey.design<-function(x,design, na.rm=FALSE, deff=FALSE,...){
+
+ if (!inherits(design,"survey.design"))
+ stop("design is not a survey design")
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$variables,na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ } else if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ x<-x[nas==0,,drop=FALSE]
+ }
+
+ N<-sum(1/design$prob)
+ m <- svymean(x, design, na.rm=na.rm)
+ total<-m*N
+ attr(total, "var")<-v<-svyCprod(x/design$prob,design$strata,
+ design$cluster[[1]], design$fpc,
+ design$nPSU,design$certainty,design$postStrata)
+ attr(total,"statistic")<-"total"
+ if (is.character(deff) || deff){
+ vsrs<-svyvar(x,design)*sum(weights(design)^2)
+ vsrs<-vsrs*(N-NROW(design$cluster))/N
+ attr(total,"deff")<-v/vsrs
+ }
+ return(total)
+}
+
+svyvar<-function(x, design, na.rm=FALSE,...){
+ .svycheck(design)
+ UseMethod("svyvar",design)
+}
+svyvar.survey.design<-function(x, design, na.rm=FALSE,...){
+
+ if (inherits(x,"formula"))
+ x<-model.frame(x,model.frame(design),na.action=na.pass)
+ else if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+
+ n<-sum(weights(design,"sampling")!=0)
+ xbar<-svymean(x,design, na.rm=na.rm)
+ if(NCOL(x)==1) {
+ x<-x-xbar
+ v<-svymean(x*x*n/(n-1),design, na.rm=na.rm)
+ attr(v,"statistic")<-"variance"
+ return(v)
+ }
+ x<-t(t(x)-xbar)
+ p<-NCOL(x)
+ a<-matrix(rep(x,p),ncol=p*p)
+ b<-x[,rep(1:p,each=p)]
+ ## Kish uses the n-1 divisor, so it affects design effects
+ v<-svymean(a*b*n/(n-1),design, na.rm=na.rm)
+ vv<-matrix(v,ncol=p)
+ dimnames(vv)<-list(names(xbar),names(xbar))
+ attr(vv,"var")<-attr(v,"var")
+ attr(vv,"statistic")<-"variance"
+ class(vv)<-c("svyvar","svystat")
+ vv
+ }
+
+print.svyvar<-function (x, covariance=FALSE, ...)
+{
+ if(!is.matrix(x)) NextMethod()
+
+ vv <- attr(x, "var")
+ if (covariance){
+ nms<-outer(rownames(x),colnames(x),paste,sep=":")
+ m<-cbind(as.vector(x), sqrt(diag(vv)))
+ rownames(m)<-nms
+ } else{
+ ii <- which(diag(sqrt(length(x)))>0)
+ m <- cbind(x[ii], sqrt(diag(vv))[ii])
+ }
+ colnames(m) <- c(attr(x, "statistic"), "SE")
+ printCoefmat(m)
+}
+
+as.matrix.svyvar<-function(x,...) unclass(x)
+
+svyquantile<-function(x,design,quantiles,...) UseMethod("svyquantile", design)
+
+svyquantile.survey.design<-function(x,design,quantiles,alpha=0.05,
+ ci=FALSE, method="linear",f=1,
+ interval.type=c("Wald","score","betaWald"),
+ na.rm=FALSE,se=ci, ties=c("discrete","rounded"), df=Inf,...){
+ if (inherits(x,"formula"))
+ x<-model.frame(x ,model.frame(design), na.action=na.pass)
+ else if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, model.frame(design,na.action=na.pass))
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ if (length(nas)>length(design$prob))
+ x<-x[nas==0,,drop=FALSE]
+ else
+ x[nas>0,]<-0
+ }
+
+
+ w<-weights(design)
+
+ if (is.null(df)){
+ qcrit<-function(p, lower.tail=TRUE) qt(p, df=degf(design), lower.tail=lower.tail)
+ } else if(df==Inf){
+ qcrit <- function(p,lower.tail=TRUE) qnorm(p,lower.tail=lower.tail)
+ } else {
+ qcrit <- function(p,lower.tail=TRUE) qt(p,df=df,lower.tail=lower.tail)
+ }
+
+
+ computeQuantiles<-function(xx,p=quantiles){
+ if (any(is.na(x))) return(NA*p)
+ oo<-order(xx)
+ cum.w<-cumsum(w[oo])/sum(w)
+ cdf<-approxfun(cum.w,xx[oo],method=method,f=f,
+ yleft=min(xx),yright=max(xx),ties=min)
+ cdf(p)
+ }
+
+ computeQuantilesRounded<-function(xx,p=quantiles){
+ if (any(is.na(xx))) return(NA*p)
+ ww<-rowsum(w,xx,reorder=TRUE)
+ xx<-sort(unique(xx))
+ cum.w <- cumsum(ww)/sum(ww)
+ cdf <- approxfun(cum.w, xx, method = method, f = f,
+ yleft = min(xx), yright = max(xx),ties=min)
+ cdf(p)
+ }
+
+
+
+ computeScoreCI<-function(xx,p){
+ if (any(is.na(xx))) return(c(NA,NA))
+
+ U<-function(theta){ ((xx>theta)-(1-p))}
+
+ scoretest<-function(theta,qlimit){
+ umean<-svymean(U(theta),design)
+ umean/sqrt(attr(umean,"var"))-qlimit
+ }
+
+ iqr<-IQR(xx)
+ lower<-min(xx)+iqr/100
+ upper<-max(xx)-iqr/100
+ tol<-1/(100*sqrt(nrow(design)))
+ c(uniroot(scoretest,interval=c(lower,upper),
+ qlimit=qcrit(alpha/2,lower.tail=FALSE),tol=tol)$root,
+ uniroot(scoretest,interval=c(lower,upper),
+ qlimit=qcrit(alpha/2,lower.tail=TRUE),tol=tol)$root)
+ }
+
+ computePCI<-function(se,alpha,p){
+ if (interval.type=="Wald"){
+ p.up<-p+qcrit(alpha/2,lower.tail=FALSE)*se
+ p.low<-p+qcrit(alpha/2,lower.tail=TRUE)*se
+ c(p.low,p.up)
+ } else if (interval.type=="betaWald"){
+ n.eff <- (p*(1-p))/(se^2)
+ n.eff <- n.eff * ( qt(alpha/2, nrow(design)-1)/qt(alpha/2, degf(design)) )^2
+ p.up<-qbeta(1-alpha/2, n.eff*p+1, n.eff*(1-p))
+ p.low<-qbeta(alpha/2, n.eff*p, n.eff*(1-p)+1)
+ c(p.low,p.up)
+ }
+
+ }
+
+ computeWaldCI<-function(xx,p){
+ if (any(is.na(xx))) return(c(NA,NA))
+ theta0<-computeQuantiles(xx,p)
+ U<- ((xx>theta0)-(1-p))
+ wtest<-svymean(U,design)
+ p.ci<-computePCI(SE(wtest),alpha,p)
+ p.low<-p.ci[1]
+ p.up<-p.ci[2]
+ oo<-order(xx)
+ cum.w<-cumsum(w[oo])/sum(w)
+ approx(cum.w,xx[oo],xout=c(p.low,p.up), method=method,f=f,
+ yleft=min(xx),yright=max(xx),ties=min)$y
+
+ }
+
+ computeWaldCIRounded<-function(xx,p){
+ if(any(is.na(xx))) return(c(NA,NA))
+ theta0<-computeQuantilesRounded(xx,p)
+ U<- ((xx>theta0)-(1-p))
+ ww<-rowsum(w,xx, reorder=TRUE)
+ uxx <- sort(unique(xx))
+ wtest<-svymean(U,design)
+ p.ci<-computePCI(SE(wtest),alpha,p)
+ p.low<-p.ci[1]
+ p.up<-p.ci[2]
+ oo<-order(xx)
+ cum.w<-cumsum(ww)/sum(ww)
+ approx(cum.w,uxx,xout=c(p.low,p.up), method=method,f=f,
+ yleft=min(xx),yright=max(xx),ties=min)$y
+
+ }
+
+ ties<-match.arg(ties)
+ computeQ<-switch(ties, discrete=computeQuantiles,rounded=computeQuantilesRounded)
+
+ if (!is.null(dim(x)))
+ rval<-t(matrix(apply(x,2,computeQ),nrow=length(quantiles),
+ dimnames=list(as.character(round(quantiles,2)),colnames(x))))
+ else
+ rval<-computeQ(x)
+
+ if (!ci & !se) return(rval)
+
+ interval.type<-match.arg(interval.type)
+
+ computeCI<-switch(paste(interval.type,ties,sep="."), score.discrete=computeScoreCI,
+ score.rounded=stop("ties=\"rounded\" not available with interval.type=\"score\""),
+ Wald.rounded=computeWaldCIRounded,
+ betaWald.rounded=computeWaldCIRounded,
+ Wald.discrete=computeWaldCI,
+ betaWald.discrete=computeWaldCI)
+
+ if (!is.null(dim(x)))
+ cis<-array(apply(x,2,function(xx) sapply(quantiles,function(qq) computeCI(xx,qq))),
+ dim=c(2,length(quantiles),ncol(x)),
+ dimnames=list(c("(lower","upper)"),
+ as.character(round(quantiles,2)),
+ colnames(x)))
+ else
+ cis<-sapply(quantiles, function(qq) computeCI(x,qq))
+
+ if (ci)
+ rval<-list(quantiles=rval,CIs=cis)
+ else
+ rval<-list(quantiles=rval)
+
+ if (is.null(dim(x)))
+ ses<-(cis[2,]-cis[1,])/(2*qcrit(alpha/2,lower.tail=FALSE))
+ else
+ ses<-(cis[2,,]-cis[1,,])/(2*qcrit(alpha/2,lower.tail=FALSE))
+ attr(rval,"SE")<-ses
+ class(rval)<-"svyquantile"
+ rval
+ }
+
+SE.svyquantile<-function(object,...){
+ attr(object,"SE")
+}
+
+vcov.svyquantile<-function(object,...){
+ se<-SE(object)
+ if (is.null(se)) stop("no uncertainty information present")
+ v<-matrix(NA,length(se),length(se))
+ warning("Only diagonal of vcov() available")
+ diag(v)<-se
+ v
+}
+
+coef.svyquantile<-function(object,...){
+ rval<-as.vector(object$quantiles)
+ if(ncol(object$quantiles)==1)
+ names(rval)<-rownames(object$quantiles)
+ else if (nrow(object$quantiles)==1)
+ names(rval)<-colnames(object$quantiles)
+ else names(rval)<-t(outer(colnames(object$quantiles),
+ rownames(object$quantiles),
+ paste,sep=":"))
+ rval
+}
+
+print.svyquantile<-function(x,...){
+ print(list(quantiles=x$quantiles, CIs=x$CIs))
+}
+
+coef.svyratio<-function(object,...,drop=TRUE){
+ if (!drop) return(object$ratio)
+ cf<-as.vector(object$ratio)
+ nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/"))
+ names(cf)<-nms
+ cf
+}
+
+SE.svyratio<-function(object,...,drop=TRUE){
+ if(!drop) return(sqrt(object$var))
+ se<-as.vector(sqrt(object$var))
+ nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/"))
+ names(se)<-nms
+ se
+}
+
+svyratio<-function(numerator,denominator, design,...){
+ .svycheck(design)
+ UseMethod("svyratio",design)
+}
+
+svyratio.survey.design<-function(numerator, denominator, design,...){
+
+ if (inherits(numerator,"formula"))
+ numerator<-model.frame(numerator,design$variables)
+ else if(typeof(numerator) %in% c("expression","symbol"))
+ numerator<-eval(numerator, design$variables)
+ if (inherits(denominator,"formula"))
+ denominator<-model.frame(denominator,design$variables)
+ else if(typeof(denominator) %in% c("expression","symbol"))
+ denominator<-eval(denominator, design$variables)
+
+ nn<-NCOL(numerator)
+ nd<-NCOL(denominator)
+
+ all<-cbind(numerator,denominator)
+ allstats<-svytotal(all,design)
+ rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/"))
+
+
+ vars<-matrix(ncol=nd,nrow=nn)
+ for(i in 1:nn){
+ for(j in 1:nd){
+ r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob)
+ vars[i,j]<-svyCprod(r*1/design$prob, design$strata, design$cluster[[1]], design$fpc,
+ design$nPSU, design$certainty,design$postStrata)
+ }
+ }
+ colnames(vars)<-names(denominator)
+ rownames(vars)<-names(numerator)
+ rval$var<-vars
+ rval$call<-sys.call()
+ class(rval)<-"svyratio"
+ rval
+
+ }
+
+print.svyratio_separate<-function(x,...){
+ cat("Stratified ratio estimate: ")
+ if (!is.null(x$call))
+ print(x$call)
+ else if (!is.null(attr(x,"call")))
+ print(attr(x$call))
+ for(r in x$ratios) {
+ print(r)
+ }
+ invisible(x)
+}
+
+print.svyratio<-function(x,...){
+ cat("Ratio estimator: ")
+ if (!is.null(x$call))
+ print(x$call)
+ else if(!is.null(attr(x,"call")))
+ print(attr(x,"call"))
+ cat("Ratios=\n")
+ print(x$ratio)
+ cat("SEs=\n")
+ print(sqrt(x$var))
+ invisible(NULL)
+}
+
+predict.svyratio<-function(object, total, se=TRUE,...){
+ if (se)
+ return(list(total=object$ratio*total,se=sqrt(object$var)*total))
+ else
+ return(object$ratio*total)
+}
+
+predict.svyratio_separate<-function(object, total, se=TRUE,...){
+
+ if (length(total)!=length(object$ratios))
+ stop("Number of strata differ in ratio object and totals.")
+ if (!is.null(names(total)) && !is.null(levels(object$strata))){
+ if (!setequal(names(total), levels(object$strata)))
+ warning("Names of strata differ in ratio object and totals")
+ else if (!all(names(total)==levels(object$strata))){
+ warning("Reordering supplied totals to make their names match the ratio object")
+ total<-total[match(names(total),levels(object$strata))]
+ }
+ }
+ totals<-mapply(predict, object=object$ratios, total=total,se=se,...,SIMPLIFY=FALSE)
+
+ if(se){
+ rval<-totals[[1]]$total
+ v<-totals[[1]]$se^2
+ for(ti in totals[-1]) {
+ rval<-rval+ti$total
+ v<-v+ti$se^2
+ }
+ list(total=rval,se=sqrt(v))
+ } else {
+ rval<-totals[[1]]
+ for (ti in totals[-1]) rval<-rval+ti
+ rval
+ }
+
+}
+
+
+cv.svyratio<-function(object,...){
+ sqrt(object$var)/object$ratio
+}
+
+svytable<-function(formula, design, ...){
+ UseMethod("svytable",design)
+}
+
+svytable.survey.design<-function(formula, design, Ntotal=NULL, round=FALSE,...){
+
+ if (!inherits(design,"survey.design")) stop("design must be a survey design")
+ weights<-1/design$prob
+
+ ## unstratified or unadjusted
+ if (length(Ntotal)<=1 || !design$has.strata){
+ if (length(formula)==3)
+ tblcall<-bquote(xtabs(I(weights*.(formula[[2]]))~.(formula[[3]]), data=model.frame(design),...))
+ else
+ tblcall<-bquote(xtabs(weights~.(formula[[2]]), data=model.frame(design),...))
+ tbl<-eval(tblcall)
+ if (!is.null(Ntotal)) {
+ if(length(formula)==3)
+ tbl<-tbl/sum(Ntotal)
+ else
+ tbl<-tbl*sum(Ntotal)/sum(tbl)
+ }
+ if (round)
+ tbl<-round(tbl)
+ attr(tbl,"call")<-match.call()
+ class(tbl)<-c("svytable",class(tbl))
+ return(tbl)
+ }
+ ## adjusted and stratified
+ if (length(formula)==3)
+ tblcall<-bquote(xtabs(I(weights*.(formula[[2]]))~design$strata[,1]+.(formula[[3]]), data=model.frame(design),...))
+ else
+ tblcall<-bquote(xtabs(weights~design$strata[,1]+.(formula[[2]]), data=model.frame(design),...))
+
+ tbl<-eval(tblcall)
+
+ ss<-match(sort(unique(design$strata[,1])), Ntotal[,1])
+ dm<-dim(tbl)
+ layer<-prod(dm[-1])
+ tbl<-sweep(tbl,1,Ntotal[ss, 2]/apply(tbl,1,sum),"*")
+ tbl<-apply(tbl, 2:length(dm), sum)
+ if (round)
+ tbl<-round(tbl)
+ class(tbl)<-c("svytable","xtabs", "table")
+ attr(tbl, "call")<-match.call()
+ tbl
+}
+
+svycoxph<-function(formula,design,subset=NULL,...){
+ .svycheck(design)
+ UseMethod("svycoxph",design)
+}
+
+svycoxph.survey.design<-function(formula,design,subset=NULL,...){
+ subset<-substitute(subset)
+ subset<-eval(subset, model.frame(design),parent.frame())
+ if (!is.null(subset))
+ design<-design[subset,]
+
+ if(any(weights(design)<0)) stop("weights must be non-negative")
+
+ data<-model.frame(design)
+
+ g<-match.call()
+ g$formula<-eval.parent(g$formula)
+ g$design<-NULL
+ g$var<-NULL
+ if (is.null(g$weights))
+ g$weights<-quote(.survey.prob.weights)
+ else
+ g$weights<-bquote(.survey.prob.weights*.(g$weights))
+ g[[1]]<-quote(coxph)
+ g$data<-quote(data)
+ g$subset<-quote(.survey.prob.weights>0)
+ g$model <- TRUE
+
+ ##need to rescale weights for stability
+ data$.survey.prob.weights<-(1/design$prob)/mean(1/design$prob)
+ if (!all(all.vars(formula) %in% names(data)))
+ stop("all variables must be in design= argument")
+ g<-with(list(data=data), eval(g))
+ g$call<-match.call()
+ g$call[[1]]<-as.name(.Generic)
+ g$printcall<-sys.call(-1)
+ g$printcall[[1]]<-as.name(.Generic)
+ class(g)<-c("svycoxph", class(g))
+ g$survey.design<-design
+
+ nas<-g$na.action
+ if (length(nas))
+ design<-design[-nas,]
+
+ dbeta.subset<-resid(g,"dfbeta",weighted=TRUE)
+ if (nrow(design)==NROW(dbeta.subset)){
+ dbeta<-as.matrix(dbeta.subset)
+ } else {
+ dbeta<-matrix(0,ncol=NCOL(dbeta.subset),nrow=nrow(design))
+ dbeta[is.finite(design$prob),]<-dbeta.subset
+ }
+ g$inv.info<-g$var
+
+ if (inherits(design,"survey.design2"))
+ g$var<-svyrecvar(dbeta, design$cluster,
+ design$strata, design$fpc,
+ postStrata=design$postStrata)
+ else if (inherits(design, "twophase"))
+ g$var<-twophasevar(dbeta, design)
+ else if(inherits(design, "twophase2"))
+ g$var<-twophase2var(dbeta, design)
+ else if(inherits(design, "pps"))
+ g$var<-ppsvar(dbeta,design)
+ else
+ g$var<-svyCprod(dbeta, design$strata,
+ design$cluster[[1]], design$fpc,design$nPSU,
+ design$certainty,design$postStrata)
+
+ g$wald.test<-coef(g)%*%solve(g$var,coef(g))
+ g$ll<-g$loglik
+ g$loglik<-NULL
+ g$rscore<-NULL
+ g$score<-NA
+ g$degf.resid<-degf(design)-length(coef(g)[!is.na(coef(g))])+1
+
+ g
+}
+
+
+model.frame.svycoxph<-function(formula,...){
+ f<-formula$call
+ env <- environment(formula(formula))
+ if (is.null(env))
+ env <- parent.frame()
+ f[[1]]<-as.name("model.frame")
+ f$data<-quote(data)
+ f$design<-NULL
+ f$method<-f$control<-f$singular.ok<-f$model<-f$x<-f$y<-f$iter<-NULL
+ f$formula<-formula(formula)
+ if (is.null(f$weights))
+ f$weights<-quote(.survey.prob.weights)
+ else
+ f$weights<-bquote(.survey.prob.weights*.(f$weights))
+ design<-formula$survey.design
+ data<-model.frame(design)
+ data$.survey.prob.weights<-(1/design$prob)/sum(1/design$prob)
+ with(list(data=data), eval(f))
+}
+
+model.matrix.svycoxph<-function (object, data = NULL, contrast.arg = object$contrasts,
+ ...)
+{
+ if (!is.null(object[["x"]]))
+ object[["x"]]
+ else {
+ if (is.null(data))
+ data <- model.frame(object, ...)
+ else data <- model.frame(object, data = data, ...)
+ Terms <- object$terms
+ attr(Terms, "intercept") <- 1
+ strats <- attr(Terms, "specials")$strata
+ cluster <- attr(Terms, "specials")$cluster
+ dropx <- NULL
+ if (length(cluster)) {
+ tempc <- untangle.specials(Terms, "cluster", 1:10)
+ ord <- attr(Terms, "order")[tempc$terms]
+ if (any(ord > 1))
+ stop("Cluster can not be used in an interaction")
+ dropx <- tempc$terms
+ }
+ if (length(strats)) {
+ temp <- untangle.specials(Terms, "strata", 1)
+ dropx <- c(dropx, temp$terms)
+ }
+ if (length(dropx)) {
+ newTerms <- Terms[-dropx]
+ X <- model.matrix(newTerms, data, contrasts = contrast.arg)
+ }
+ else {
+ newTerms <- Terms
+ X <- model.matrix(Terms, data, contrasts = contrast.arg)
+ }
+ X
+ }
+}
+
+print.svycoxph<-function(x,...){
+ print(x$survey.design, varnames=FALSE, design.summaries=FALSE,...)
+## x$call<-x$printcall
+ NextMethod()
+}
+
+summary.svycoxph<-function(object,...){
+ print(object$survey.design,varnames=FALSE, design.summaries=FALSE,...)
+## object$call<-object$printcall
+ NextMethod()
+}
+
+survfit.svycoxph<-function(object,...){
+ stop("No survfit method for survey models")
+}
+extractAIC.svycoxph<-function(fit,...){
+ stop("No AIC for survey models")
+}
+
+anova.svycoxph<-function(object,...){
+ stop("No anova method for survey models")
+}
+
+svyglm<-function(formula, design, ...){
+ .svycheck(design)
+ UseMethod("svyglm",design)
+}
+
+svyglm.survey.design<-function(formula,design,subset=NULL,...){
+
+ subset<-substitute(subset)
+ subset<-eval(subset, model.frame(design), parent.frame())
+ if (!is.null(subset))
+ design<-design[subset,]
+
+ data<-model.frame(design)
+
+ g<-match.call()
+ g$formula<-eval.parent(g$formula)
+ g$design<-NULL
+ g$var<-NULL
+ if (is.null(g$weights))
+ g$weights<-quote(.survey.prob.weights)
+ else
+ g$weights<-bquote(.survey.prob.weights*.(g$weights))
+ g$data<-quote(data)
+ g[[1]]<-quote(glm)
+
+ ##need to rescale weights for stability in binomial
+ data$.survey.prob.weights<-(1/design$prob)/mean(1/design$prob)
+ if (!all(all.vars(formula) %in% names(data)))
+ stop("all variables must be in design= argument")
+ g<-with(list(data=data), eval(g))
+ g$naive.cov<-summary(g)$cov.unscaled
+
+ nas<-g$na.action
+ if (length(nas))
+ design<-design[-nas,]
+
+ g$cov.unscaled<-svy.varcoef(g,design)
+ g$df.residual <- degf(design)+1-length(coef(g)[!is.na(coef(g))])
+
+ class(g)<-c("svyglm",class(g))
+ g$call<-sys.call()
+ g$call[[1]]<-as.name(.Generic)
+ if(!("formula" %in% names(g$call))) {
+ if (is.null(names(g$call)))
+ i<-1
+ else
+ i<-min(which(names(g$call)[-1]==""))
+ names(g$call)[i+1]<-"formula"
+ }
+ g$survey.design<-design
+ g
+}
+
+print.svyglm<-function(x,...){
+ print(x$survey.design, varnames=FALSE, design.summaries=FALSE,...)
+ NextMethod()
+
+}
+
+coef.svyglm<-function(object,...,na.rm=TRUE) {
+ beta<-object$coefficients
+ if (!na.rm || length(beta)==object$rank)
+ beta
+ else
+ beta[object$qr$pivot[1:object$rank]]
+}
+
+vcov.svyglm<-function(object,...) {
+ v<-object$cov.unscaled
+ dimnames(v)<-list(names(coef(object)),names(coef(object)))
+ v
+}
+
+
+svy.varcoef<-function(glm.object,design){
+ Ainv<-summary(glm.object)$cov.unscaled
+ estfun<-model.matrix(glm.object)*resid(glm.object,"working")*glm.object$weights
+ if (glm.object$rank<NCOL(estfun)){
+ estfun<-estfun[,glm.object$qr$pivot[1:glm.object$rank]]
+ }
+ naa<-glm.object$na.action
+ ## the design may still have rows with weight zero for missing values
+ ## if there are weights or calibration. model.matrix will have removed them
+ if (length(naa) && (NROW(estfun)!=nrow(design) )){
+ if ((length(naa)+NROW(estfun))!=nrow(design) )
+ stop("length mismatch: this can't happen.")
+ n<-nrow(design)
+ inx <- (1:n)[-naa]
+ ee <- matrix(0,nrow=n,ncol=NCOL(estfun))
+ ee[inx,]<-estfun
+ estfun<-ee
+ }
+
+ if (inherits(design,"survey.design2"))
+ svyrecvar(estfun%*%Ainv,design$cluster,design$strata,design$fpc,postStrata=design$postStrata)
+ else if (inherits(design, "twophase"))
+ twophasevar(estfun%*%Ainv, design)
+ else if (inherits(design, "twophase2"))
+ twophase2var(estfun%*%Ainv, design)
+ else if (inherits(design, "pps"))
+ ppsvar(estfun%*%Ainv, design)
+ else
+ svyCprod(estfun%*%Ainv,design$strata,design$cluster[[1]],design$fpc, design$nPSU,
+ design$certainty,design$postStrata)
+ }
+
+residuals.svyglm<-function(object,type = c("deviance", "pearson", "working",
+ "response", "partial"),...){
+ type<-match.arg(type)
+ if (type=="pearson"){
+ y <- object$y
+ mu <- object$fitted.values
+ wts <- object$prior.weights
+ pwts<- 1/object$survey.design$prob
+ pwts<- pwts/mean(pwts)
+ ## missing values in calibrated/post-stratified designs
+ ## the rows will still be in the design object but not in the model
+ if (length(naa<-object$na.action) && (length(pwts)!=length(wts))){
+ if(length(naa)+length(wts) != length(pwts))
+ stop("length mismatch: this can't happen.")
+ inx<-(1:length(pwts))[-naa]
+ } else inx<-1:length(pwts)
+ r<-numeric(length(pwts))
+ r[inx]<-(y - mu) * sqrt(wts/pwts[inx])/(sqrt(object$family$variance(mu)))
+ if (is.null(object$na.action))
+ r
+ else
+ naresid(object$na.action, r)
+ } else
+ NextMethod()
+
+}
+
+summary.svyglm<-function (object, correlation = FALSE, df.resid=NULL,...)
+{
+ Qr <- object$qr
+ est.disp <- TRUE
+ if (is.null(df.resid))
+ df.r <- object$df.residual
+ else
+ df.r<-df.resid
+
+ dispersion<-svyvar(resid(object,"pearson"), object$survey.design,
+ na.rm=TRUE)
+
+ coef.p <- coef(object)
+ covmat<-vcov(object)
+ dimnames(covmat) <- list(names(coef.p), names(coef.p))
+ var.cf <- diag(covmat)
+ s.err <- sqrt(var.cf)
+ tvalue <- coef.p/s.err
+ dn <- c("Estimate", "Std. Error")
+ if (!est.disp) {
+ pvalue <- 2 * pnorm(-abs(tvalue))
+ coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
+ dimnames(coef.table) <- list(names(coef.p), c(dn, "z value",
+ "Pr(>|z|)"))
+ }
+ else if (df.r > 0) {
+ pvalue <- 2 * pt(-abs(tvalue), df.r)
+ coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
+ dimnames(coef.table) <- list(names(coef.p), c(dn, "t value",
+ "Pr(>|t|)"))
+ }
+ else {
+ coef.table <- cbind(coef.p, Inf)
+ dimnames(coef.table) <- list(names(coef.p), dn)
+ }
+ ans <- c(object[c("call", "terms", "family", "deviance",
+ "aic", "contrasts", "df.residual", "null.deviance", "df.null",
+ "iter")], list(deviance.resid = residuals(object, type = "deviance"),
+ aic = object$aic, coefficients = coef.table, dispersion = dispersion,
+ df = c(object$rank, df.r,NCOL(Qr$qr)), cov.unscaled = covmat,
+ cov.scaled = covmat))
+ if (correlation) {
+ dd <- sqrt(diag(covmat))
+ ans$correlation <- covmat/outer(dd, dd)
+ }
+ ans$aliased<-is.na(coef(object,na.rm=FALSE))
+ ans$survey.design<-list(call=object$survey.design$call)
+ class(ans) <- c("summary.svyglm","summary.glm")
+ return(ans)
+}
+
+
+logLik.svyglm<-function(object,...){
+ warning("svyglm not fitted by maximum likelihood.")
+ object$deviance
+}
+
+AIC.svyglm<-function(object,...,k=2){
+ if (length(list(...))){
+ do.call(rbind,lapply(list(object,...),extractAIC,k=k))
+ } else {
+ extractAIC(object,k=k)
+ }
+}
+extractAIC.svyglm<-function(fit,scale,k=2,...){
+ if (length(attr(terms(fit),"factors"))){
+ r<-regTermTest(fit, delete.response(formula(fit)), method="LRT")
+ deltabar<-mean(r$lambda)
+ } else {
+ r<-list(lambda=0)
+ deltabar<-NaN
+ }
+ d<-fit$deviance
+ c(eff.p=sum(r$lambda), AIC=d+k*sum(r$lambda),deltabar=deltabar)
+}
+
+extractAIC.svrepglm<-extractAIC.svyglm
+
+BIC.svyglm<-function(object,...,maximal){
+ if (length(list(...))){
+ do.call(rbind,lapply(list(object,...),dBIC,modelM=maximal))
+ } else {
+ dBIC(object,modelM=maximal)
+ }
+
+ }
+
+dBIC<-function(modela,modelM){
+ pm<-modela$rank
+ pM<-modelM$rank
+
+ if (any(!(names(coef(modela))%in% names(coef(modelM))))){
+ stop("coefficients in model but not in maximal model")
+ }
+ index<-!(names(coef(modelM))%in% names(coef(modela)))
+ n<-1+modela$df.null
+ if(any(index)){
+ wald<-coef(modelM)[index]%*%solve(vcov(modelM)[index,index],coef(modelM)[index])
+ detDelta<-det(solve(modelM$naive.cov[index,index,drop=FALSE],modelM$cov.unscaled[index,index,drop=FALSE]))
+ dbar<-detDelta^(1/(pM-pm))
+ nstar<-n/dbar
+ }else {
+ wald<-0
+ detDelta<-1
+ dbar<-1
+ nstar=NaN
+ }
+ c(p=pm, BIC=wald+pm*log(n)+log(detDelta)+deviance(modelM),neff=nstar)
+ }
+
+
+confint.svyglm<-function(object,parm,level=0.95,method=c("Wald","likelihood"),ddf=Inf,...){
+ method<-match.arg(method)
+ if(method=="Wald"){
+ tlevel <- 1 - 2*pnorm(qt((1 - level)/2, df = ddf))
+ return(confint.default(object,parm=parm,level=tlevel,...))
+ }
+ pnames <- names(coef(object))
+ if (missing(parm))
+ parm <- seq_along(pnames)
+ else if (is.character(parm))
+ parm <- match(parm, pnames, nomatch = 0)
+ lambda<-diag(object$cov.unscaled[parm,parm,drop=FALSE])/diag(object$naive.cov[parm,parm,drop=FALSE])
+ if(is.null(ddf)) ddf<-object$df.residual
+ if (ddf==Inf)
+ alpha<-pnorm(qnorm((1-level)/2)*sqrt(lambda))/2
+ else {
+ alpha<-pnorm(qt((1-level)/2,df=ddf)*sqrt(lambda))/2
+ }
+ rval<-vector("list",length(parm))
+ for(i in 1:length(parm)){
+ temp<-MASSprofile_glm(fitted=object,which=parm[i],alpha=alpha[i],...)
+ rval[[i]]<-confint_profile(temp,parm=parm[i],level=level, unscaled_level=2*alpha[i],...)
+ }
+
+ names(rval)<-pnames[parm]
+ if (length(rval)==1)
+ rval<-rval[[1]]
+ else
+ rval<-do.call(rbind,rval)
+ attr(rval,"levels")<-level
+ rval
+}
+
+
+##
+## based on MASS:::confint.profile.glm
+## which is GPL and (c) Bill Venables and Brian D Ripley.
+##
+confint_profile <- function (object, parm = seq_along(pnames), level = 0.95, unscaled_level, ...)
+{
+ of <- attr(object, "original.fit")
+ pnames <- names(coef(of))
+ if (is.character(parm))
+ parm <- match(parm, pnames, nomatch = 0L)
+ a <- (1-level)/2
+ a <- c(a, 1 - a)
+ pct <- paste(round(100 * a, 1), "%")
+ ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(pnames[parm],
+ pct))
+ cutoff <- c(qnorm(unscaled_level),qnorm(unscaled_level, lower.tail=FALSE))
+ for (pm in parm) {
+ pro <- object[[pnames[pm]]]
+ if (is.null(pro))
+ next
+ if (length(pnames) > 1L)
+ sp <- spline(x = pro[, "par.vals"][, pm], y = pro[,
+ 1])
+ else sp <- spline(x = pro[, "par.vals"], y = pro[, 1])
+ ci[pnames[pm], ] <- approx(sp$y, sp$x, xout = cutoff)$y
+ }
+ drop(ci)
+}
+
+##
+## MASS:::profile.glm with very slight changes to avoid rounding error in 1-alpha
+## original is GPL and (c) Bill Venables and Brian D Ripley.
+##
+
+MASSprofile_glm<-function (fitted, which = 1:p, alpha = 0.01, maxsteps = 10, del = zmax/5,
+ trace = FALSE, ...)
+{
+ Pnames <- names(B0 <- coef(fitted))
+ nonA <- !is.na(B0)
+ pv0 <- t(as.matrix(B0))
+ p <- length(Pnames)
+ if (is.character(which))
+ which <- match(which, Pnames)
+ summ <- summary(fitted)
+ std.err <- summ$coefficients[, "Std. Error", drop = FALSE]
+ mf <- model.frame(fitted)
+ Y <- model.response(mf)
+ n <- NROW(Y)
+ O <- model.offset(mf)
+ if (!length(O))
+ O <- rep(0, n)
+ W <- model.weights(mf)
+ if (length(W) == 0L)
+ W <- rep(1, n)
+ OriginalDeviance <- deviance(fitted)
+ DispersionParameter <- summ$dispersion
+ X <- model.matrix(fitted)
+ fam <- family(fitted)
+ switch(fam$family, binomial = , poisson = , `Negative Binomial` = {
+ zmax <- sqrt(qchisq(alpha, 1,lower.tail=FALSE))
+ profName <- "z"
+ }, gaussian = , quasi = , inverse.gaussian = , quasibinomial = ,
+ quasipoisson = , {
+ zmax <- sqrt(qf(alpha, 1, n - p,lower.tail=FALSE))
+ profName <- "tau"
+ })
+ prof <- vector("list", length = length(which))
+ names(prof) <- Pnames[which]
+ for (i in which) {
+ if (!nonA[i])
+ next
+ zi <- 0
+ pvi <- pv0
+ a <- nonA
+ a[i] <- FALSE
+ Xi <- X[, a, drop = FALSE]
+ pi <- Pnames[i]
+ for (sgn in c(-1, 1)) {
+ if (trace)
+ message("\nParameter: ", pi, " ", c("down", "up")[(sgn +
+ 1)/2 + 1])
+ step <- 0
+ z <- 0
+ LP <- X[, nonA, drop = FALSE] %*% B0[nonA] + O
+ while ((step <- step + 1) < maxsteps && abs(z) <
+ zmax) {
+ bi <- B0[i] + sgn * step * del * std.err[Pnames[i],
+ 1]
+ o <- O + X[, i] * bi
+ fm <- glm.fit(x = Xi, y = Y, weights = W, etastart = LP,
+ offset = o, family = fam, control = fitted$control)
+ LP <- Xi %*% fm$coefficients + o
+ ri <- pv0
+ ri[, names(coef(fm))] <- coef(fm)
+ ri[, pi] <- bi
+ pvi <- rbind(pvi, ri)
+ zz <- (fm$deviance - OriginalDeviance)/DispersionParameter
+ if (zz > -0.001)
+ zz <- max(zz, 0)
+ else stop("profiling has found a better solution, so original fit had not converged")
+ z <- sgn * sqrt(zz)
+ zi <- c(zi, z)
+ }
+ }
+ si <- order(zi)
+ prof[[pi]] <- structure(data.frame(zi[si]), names = profName)
+ prof[[pi]]$par.vals <- pvi[si, , drop = FALSE]
+ }
+ val <- structure(prof, original.fit = fitted, summary = summ)
+ class(val) <- c("profile.glm", "profile")
+ val
+}
+
+
+###
+
+svymle<-function(loglike, gradient=NULL, design, formulas,
+ start=NULL, control=list(maxit=1000),
+ na.action="na.fail", method=NULL,...){
+ if(is.null(method))
+ method<-if(is.null(gradient)) "Nelder-Mead" else "nlm"
+
+ if (!inherits(design,"survey.design"))
+ stop("design is not a survey.design")
+ weights<-weights(design)
+ wtotal<-sum(weights)
+
+ if (is.null(control$fnscale))
+ control$fnscale <- -wtotal/length(weights)
+ if (inherits(design, "twophase"))
+ data<-design$phase1$sample$variables
+ else
+ data<-design$variables
+
+## Get the response variable
+ nms<-names(formulas)
+ if (nms[1]==""){
+ if (inherits(formulas[[1]],"formula"))
+ y<-eval.parent(model.frame(formulas[[1]],data=data,na.action=na.pass))
+ else
+ y<-eval(y,data,parent.frame())
+ formulas[1]<-NULL
+ if (FALSE && NCOL(y)>1) stop("Y has more than one column")
+ } else {
+ ## one formula must have response
+ has.response<-sapply(formulas,length)==3
+ if (sum(has.response)!=1) stop("Need a response variable")
+ ff<-formulas[[which(has.response)]]
+ ff[[3]]<-1
+ y<-eval.parent(model.frame(ff,data=data,na.action=na.pass))
+ formulas[[which(has.response)]]<-delete.response(terms(formulas[[which(has.response)]]))
+ nms<-c("",nms)
+ }
+
+ if(length(which(nms==""))>1) stop("Formulas must have names")
+
+ mf<-vector("list",length(formulas))
+ vnms <- unique(do.call(c, lapply(formulas, all.vars)))
+ uformula <- make.formula(vnms)
+ mf <- model.frame(uformula, data=data,na.action=na.pass)
+ mf <- cbind(`(Response)`=y, mf)
+ mf<-mf[,!duplicated(colnames(mf)),drop=FALSE]
+
+ mf<-get(na.action)(mf)
+ nas<-attr(mf,"na.action")
+ if (length(nas))
+ design<-design[-nas,]
+ weights<-1/design$prob
+ wtotal<-sum(weights)
+
+ Y<-mf[,1]
+ mm<-lapply(formulas,model.matrix, data=mf)
+
+ ## parameter names
+ parnms<-lapply(mm,colnames)
+ for(i in 1:length(parnms))
+ parnms[[i]]<-paste(nms[i+1],parnms[[i]],sep=".")
+ parnms<-unlist(parnms)
+
+ # maps position in theta to model matrices
+ np<-c(0,cumsum(sapply(mm,NCOL)))
+
+
+ objectivefn<-function(theta,...){
+ args<-vector("list",length(nms))
+ args[[1]]<-Y
+ for(i in 2:length(nms))
+ args[[i]]<-mm[[i-1]]%*%theta[(np[i-1]+1):np[i]]
+ names(args)<-nms
+ args<-c(args, ...)
+ sum(do.call("loglike",args)*weights)
+ }
+
+ if (is.null(gradient)) {
+ grad<-NULL
+ } else {
+ fnargs<-names(formals(loglike))[-1]
+ grargs<-names(formals(gradient))[-1]
+ if(!identical(fnargs,grargs))
+ stop("loglike and gradient have different arguments.")
+ reorder<-na.omit(match(grargs,nms[-1]))
+ grad<-function(theta,...){
+ args<-vector("list",length(nms))
+ args[[1]]<-Y
+ for(i in 2:length(nms))
+ args[[i]]<-drop(mm[[i-1]]%*%theta[(np[i-1]+1):np[i]])
+ names(args)<-nms
+ args<-c(args,...)
+ rval<-NULL
+ tmp<-do.call("gradient",args)
+ for(i in reorder){
+ rval<-c(rval, colSums(as.matrix(tmp[,i]*weights*mm[[i]])))
+ }
+ drop(rval)
+ }
+ }
+
+ theta0<-numeric(np[length(np)])
+ if (is.list(start))
+ st<-do.call("c",start)
+ else
+ st<-start
+
+ if (length(st)==length(theta0)) {
+ theta0<-st
+ } else {
+ stop("starting values wrong length")
+ }
+
+ if (method=="nlm"){
+ ff<-function(theta){
+ rval<- -objectivefn(theta)
+ if (is.na(rval)) rval<- -Inf
+ attr(rval,"gradient")<- -grad(theta)
+ rval
+ }
+ rval<-nlm(ff, theta0,hessian=TRUE)
+ if (rval$code>3) warning("nlm did not converge")
+ rval$par<-rval$estimate
+ } else {
+ rval<-optim(theta0, objectivefn, grad,control=control,
+ hessian=TRUE,method=method,...)
+ if (rval$conv!=0) warning("optim did not converge")
+ }
+
+
+
+
+ names(rval$par)<-parnms
+ dimnames(rval$hessian)<-list(parnms,parnms)
+
+ if (is.null(gradient)) {
+ rval$invinf<-solve(-rval$hessian)
+ rval$scores<-NULL
+ rval$sandwich<-NULL
+ } else {
+ theta<-rval$par
+ args<-vector("list",length(nms))
+ args[[1]]<-Y
+ for(i in 2:length(nms))
+ args[[i]]<-drop(mm[[i-1]]%*%theta[(np[i-1]+1):np[i]])
+ names(args)<-nms
+ args<-c(args,...)
+ deta<-do.call("gradient",args)
+ rval$scores<-NULL
+ for(i in reorder)
+ rval$scores<-cbind(rval$scores,deta[,i]*weights*mm[[i]])
+
+ rval$invinf<-solve(-rval$hessian)
+ dimnames(rval$invinf)<-list(parnms,parnms)
+
+ db<-rval$scores%*%rval$invinf
+ if (inherits(design,"survey.design2"))
+ rval$sandwich<-svyrecvar(db,design$cluster,design$strata, design$fpc,
+ postStrata=design$postStrata)
+ else if (inherits(design, "twophase"))
+ rval$sandwich<-twophasevar(db,design)
+ else
+ rval$sandwich<-svyCprod(db,design$strata,design$cluster[[1]],
+ design$fpc, design$nPSU,
+ design$certainty, design$postStrata)
+ dimnames(rval$sandwich)<-list(parnms,parnms)
+ }
+ rval$call<-match.call()
+ rval$design<-design
+ class(rval)<-"svymle"
+ rval
+
+}
+
+
+svymleOLD<-function(loglike, gradient=NULL, design, formulas,
+ start=NULL, control=list(maxit=1000),
+ na.action="na.fail", method=NULL,...){
+ if(is.null(method))
+ method<-if(is.null(gradient)) "Nelder-Mead" else "nlm"
+
+ if (!inherits(design,"survey.design"))
+ stop("design is not a survey.design")
+
+ weights<-1/design$prob
+ wtotal<-sum(weights)
+ if (is.null(control$fnscale))
+ control$fnscale<- -wtotal
+ if (inherits(design, "twophase"))
+ data<-design$phase1$sample$variables
+ else
+ data<-design$variables
+
+## Get the response variable
+ nms<-names(formulas)
+ if (nms[1]==""){
+ if (inherits(formulas[[1]],"formula"))
+ y<-eval.parent(model.frame(formulas[[1]],data=data,na.action=na.pass))
+ else
+ y<-eval(y,data,parent.frame())
+ formulas[1]<-NULL
+ if (FALSE && NCOL(y)>1) stop("Y has more than one column")
+ } else {
+ ## one formula must have response
+ has.response<-sapply(formulas,length)==3
+ if (sum(has.response)!=1) stop("Need a response variable")
+ ff<-formulas[[which(has.response)]]
+ ff[[3]]<-1
+ y<-eval.parent(model.frame(ff,data=data,na.action=na.pass))
+ formulas[[which(has.response)]]<-delete.response(terms(formulas[[which(has.response)]]))
+ nms<-c("",nms)
+ }
+
+ if(length(which(nms==""))>1) stop("Formulas must have names")
+
+
+ mf<-vector("list",length(formulas))
+ for(i in 1:length(formulas)){
+ mf[[i]]<-eval.parent(model.frame(formulas[[i]], data=data, na.action=na.pass))
+ }
+ notnulls<-sapply(mf,function(mfi) NCOL(mfi)!=0)
+ mf<-mf[notnulls]
+ if (any(notnulls))
+ mf<-as.data.frame(do.call("cbind",c(y,mf)))
+ else
+ mf<-y
+ names(mf)[1]<-"(Response)"
+ mf<-mf[,!duplicated(colnames(mf)),drop=FALSE]
+
+ mf<-get(na.action)(mf)
+ nas<-attr(mf,"na.action")
+ if (length(nas))
+ design<-design[-nas,]
+
+ Y<-mf[,1]
+ mm<-lapply(formulas,model.matrix, data=mf)
+
+ ## parameter names
+ parnms<-lapply(mm,colnames)
+ for(i in 1:length(parnms))
+ parnms[[i]]<-paste(nms[i+1],parnms[[i]],sep=".")
+ parnms<-unlist(parnms)
+
+ # maps position in theta to model matrices
+ np<-c(0,cumsum(sapply(mm,NCOL)))
+
+
+ objectivefn<-function(theta,...){
+ args<-vector("list",length(nms))
+ args[[1]]<-Y
+ for(i in 2:length(nms))
+ args[[i]]<-mm[[i-1]]%*%theta[(np[i-1]+1):np[i]]
+ names(args)<-nms
+ args<-c(args, ...)
+ sum(do.call("loglike",args)*weights)
+ }
+
+ if (is.null(gradient)) {
+ grad<-NULL
+ } else {
+ fnargs<-names(formals(loglike))[-1]
+ grargs<-names(formals(gradient))[-1]
+ if(!identical(fnargs,grargs))
+ stop("loglike and gradient have different arguments.")
+ reorder<-na.omit(match(grargs,nms[-1]))
+ grad<-function(theta,...){
+ args<-vector("list",length(nms))
+ args[[1]]<-Y
+ for(i in 2:length(nms))
+ args[[i]]<-drop(mm[[i-1]]%*%theta[(np[i-1]+1):np[i]])
+ names(args)<-nms
+ args<-c(args,...)
+ rval<-NULL
+ tmp<-do.call("gradient",args)
+ for(i in reorder){
+ rval<-c(rval, colSums(as.matrix(tmp[,i]*weights*mm[[i]])))
+ }
+ drop(rval)
+ }
+ }
+
+ theta0<-numeric(np[length(np)])
+ if (is.list(start))
+ st<-do.call("c",start)
+ else
+ st<-start
+
+ if (length(st)==length(theta0)) {
+ theta0<-st
+ } else {
+ stop("starting values wrong length")
+ }
+
+ if (method=="nlm"){
+ ff<-function(theta){
+ rval<- -objectivefn(theta)
+ if (is.na(rval)) rval<- -Inf
+ attr(rval,"grad")<- -grad(theta)
+ rval
+ }
+ rval<-nlm(ff, theta0,hessian=TRUE)
+ if (rval$code>3) warning("nlm did not converge")
+ rval$par<-rval$estimate
+ } else {
+ rval<-optim(theta0, objectivefn, grad,control=control,
+ hessian=TRUE,method=method,...)
+ if (rval$conv!=0) warning("optim did not converge")
+ }
+
+
+
+
+ names(rval$par)<-parnms
+ dimnames(rval$hessian)<-list(parnms,parnms)
+
+ if (is.null(gradient)) {
+ rval$invinf<-solve(-rval$hessian)
+ rval$scores<-NULL
+ rval$sandwich<-NULL
+ } else {
+ theta<-rval$par
+ args<-vector("list",length(nms))
+ args[[1]]<-Y
+ for(i in 2:length(nms))
+ args[[i]]<-drop(mm[[i-1]]%*%theta[(np[i-1]+1):np[i]])
+ names(args)<-nms
+ args<-c(args,...)
+ deta<-do.call("gradient",args)
+ rval$scores<-NULL
+ for(i in reorder)
+ rval$scores<-cbind(rval$scores,deta[,i]*weights*mm[[i]])
+
+ rval$invinf<-solve(-rval$hessian)
+ dimnames(rval$invinf)<-list(parnms,parnms)
+
+ db<-rval$scores%*%rval$invinf
+ if (inherits(design,"survey.design2"))
+ rval$sandwich<-svyrecvar(db,design$cluster,design$strata, design$fpc,
+ postStrata=design$postStrata)
+ else if (inherits(design, "twophase"))
+ rval$sandwich<-twophasevar(db,design)
+ else
+ rval$sandwich<-svyCprod(db,design$strata,design$cluster[[1]],
+ design$fpc, design$nPSU,
+ design$certainty, design$postStrata)
+ dimnames(rval$sandwich)<-list(parnms,parnms)
+ }
+ rval$call<-match.call()
+ rval$design<-design
+ class(rval)<-"svymle"
+ rval
+
+}
+
+coef.svymle<-function(object,...) object$par
+
+vcov.svymle<-function(object,stderr=c("robust","model"),...) {
+ stderr<-match.arg(stderr)
+ if (stderr=="robust"){
+ rval<-object$sandwich
+ if (is.null(rval)) {
+ p<-length(coef(object))
+ rval<-matrix(NA,p,p)
+ }
+ } else {
+ rval<-object$invinf*mean(1/object$design$prob)
+ }
+ rval
+}
+
+
+print.svymle<-function(x,...){
+ cat("Survey-sampled mle: \n")
+ print(x$call)
+ cat("Coef: \n")
+ print(x$par)
+}
+
+summary.svymle<-function(object,stderr=c("robust","model"),...){
+ cat("Survey-sampled mle: \n")
+ print(object$call)
+ stderr<-match.arg(stderr)
+ tbl<-data.frame(Coef=coef(object),SE=sqrt(diag(vcov(object,stderr=stderr))))
+ tbl$p.value<-format.pval(2*(1-pnorm(abs(tbl$Coef/tbl$SE))), digits=3,eps=0.001)
+ print(tbl)
+ print(object$design)
+}
+
+model.frame.survey.design<-function(formula,...,drop=TRUE){
+ formula$variables
+}
+model.frame.svyrep.design<-function(formula,...){
+ formula$variables
+}
+model.frame.survey.design2<-function(formula,...){
+ formula$variables
+}
+
+.onLoad<-function(...){
+ if (is.null(getOption("survey.lonely.psu")))
+ options(survey.lonely.psu="fail")
+ if (is.null(getOption("survey.ultimate.cluster")))
+ options(survey.ultimate.cluster=FALSE)
+ if (is.null(getOption("survey.want.obsolete")))
+ options(survey.want.obsolete=FALSE)
+ if (is.null(getOption("survey.adjust.domain.lonely")))
+ options(survey.adjust.domain.lonely=FALSE)
+ if (is.null(getOption("survey.drop.replicates")))
+ options(survey.drop.replicates=TRUE)
+ if (is.null(getOption("survey.multicore")))
+ options(survey.multicore=FALSE)
+ if (is.null(getOption("survey.replicates.mse")))
+ options(survey.replicates.mse=FALSE)
+}
+
+
+predterms<-function(object,se=FALSE,terms=NULL){
+ tt<-terms(object)
+ n <- length(object$residuals)
+ p <- object$rank
+ p1 <- seq_len(p)
+ piv <- object$qr$pivot[p1]
+ beta<-coef(object)
+ X<-mm<-model.matrix(object)
+ aa <- attr(mm, "assign")
+ ll <- attr(tt, "term.labels")
+ hasintercept <- attr(tt, "intercept") > 0L
+ if (hasintercept)
+ ll <- c("(Intercept)", ll)
+ aaa <- factor(aa, labels = ll)
+ asgn <- split(order(aa), aaa)
+ if (hasintercept) {
+ asgn$"(Intercept)" <- NULL
+ }
+ avx <- colMeans(mm)
+ termsconst <- sum(avx[piv] * beta[piv])
+ nterms <- length(asgn)
+ ip <- matrix(ncol = nterms, nrow = NROW(X))
+ if (nterms > 0) {
+ predictor <- matrix(ncol = nterms, nrow = NROW(X))
+ dimnames(predictor) <- list(rownames(X), names(asgn))
+
+ if (hasintercept)
+ X <- sweep(X, 2L, avx, check.margin = FALSE)
+ unpiv <- rep.int(0L, NCOL(X))
+ unpiv[piv] <- p1
+ for (i in seq.int(1L, nterms, length.out = nterms)) {
+ iipiv <- asgn[[i]]
+ ii <- unpiv[iipiv]
+ iipiv[ii == 0L] <- 0L
+ predictor[, i] <- if (any(iipiv > 0L))
+ X[, iipiv, drop = FALSE] %*% beta[iipiv]
+ else 0
+ if (se){
+ ip[,i]<-if (any(iipiv > 0L))
+ rowSums(as.matrix(X[, iipiv, drop = FALSE] %*% vcov(object)[ii,ii]) * X[, iipiv, drop = FALSE]) else 0
+ }
+ }
+ if (!is.null(terms)) {
+ predictor <- predictor[, terms, drop = FALSE]
+ if (se)
+ ip <- ip[, terms, drop = FALSE]
+ }
+ }
+ else {
+ predictor <- ip <- matrix(0, n, 0)
+ }
+ attr(predictor, "constant") <- if (hasintercept)
+ termsconst
+ else 0
+ if(se)
+ dimnames(ip)<-dimnames(predictor)
+ if (se) list(fit=predictor,se.fit=sqrt(ip)) else predictor
+}
+
+
+predict.svyglm <- function(object, newdata=NULL, total=NULL,
+ type = c("link", "response","terms"),
+ se.fit=(type!="terms"),
+ vcov=FALSE,...){
+ if(is.null(newdata))
+ newdata<-model.frame(object$survey.design)
+ type<-match.arg(type)
+ if (type=="terms")
+ return(predterms(object,se=se.fit,...))
+ tt<-delete.response(terms(formula(object)))
+ mf<-model.frame(tt,data=newdata, xlev=object$xlevels)
+ mm<-model.matrix(tt,mf,contrasts.arg = object$contrasts)
+ if (!is.null(total) && attr(tt,"intercept")){
+ mm[,attr(tt,"intercept")]<-mm[,attr(tt,"intercept")]*total
+ }
+ eta<-drop(mm %*% coef(object))
+ d<-drop(object$family$mu.eta(eta))
+ eta<-switch(type, link=eta, response=object$family$linkinv(eta))
+ if(se.fit){
+ if(vcov){
+ vv<-mm %*% vcov(object) %*% t(mm)
+ attr(eta,"var")<-switch(type,
+ link=vv,
+ response=d*(t(vv*d)))
+ } else {
+ ## FIXME make this more efficient
+ vv<-drop(rowSums((mm %*% vcov(object)) * mm))
+ attr(eta,"var")<-switch(type,
+ link=vv,
+ response=drop(d*(t(vv*d))))
+ }
+ }
+ attr(eta,"statistic")<-type
+ class(eta)<-"svystat"
+ eta
+ }
+
diff --git a/R/surveyby.R b/R/surveyby.R
new file mode 100755
index 0000000..7df4e41
--- /dev/null
+++ b/R/surveyby.R
@@ -0,0 +1,259 @@
+##
+## tables of statistics.
+##
+svyby<-function(formula, by, design,...) UseMethod("svyby",design)
+
+svyby.default<-function(formula, by, design, FUN,..., deff=FALSE, keep.var=TRUE,
+ keep.names=TRUE, verbose=FALSE, vartype=c("se","ci","ci","cv","cvpct","var"),
+ drop.empty.groups=TRUE, covmat=FALSE, return.replicates=FALSE, na.rm.by=FALSE,
+ na.rm.all=FALSE,
+ multicore=getOption("survey.multicore")){
+
+ if (inherits(by, "formula"))
+ byfactors<-model.frame(by, model.frame(design), na.action=na.pass)
+ else
+ byfactors<-as.data.frame(by)
+
+ if(covmat || return.replicates){
+ if (!inherits(design,"svyrep.design"))
+ stop("covmat=TRUE not implemented for this design type")
+ }
+
+ if (multicore && !requireNamespace("parallel",quietly=TRUE))
+ multicore<-FALSE
+
+ ## some people insist on using vectors rather than formulas
+ ## so I suppose we should be nice to them
+ if (!inherits(formula, "formula")){
+ if (NROW(formula)!=length(byfactor))
+ stop("'formula' is the wrong length")
+ if (!(is.data.frame(formula) ||
+ is.matrix(formula) ||
+ is.vector(formula))){
+ stop("invalid type for 'formula'")
+ }
+ }
+
+ hasdeff<- is.character(deff) || deff
+
+ ## all combinations that actually occur in this design
+ byfactor<-do.call("interaction", byfactors)
+ dropped<- weights(design,"sampling")==0
+ if (na.rm.by) dropped<-dropped | apply(byfactors, 1, function(x) any(is.na(x)))
+ if (na.rm.all){
+ if (inherits(formula,"formula"))
+ allx<-model.frame(formula,model.frame(design),na.action=na.pass)
+ else
+ allx<-formula
+ dropped <- dropped | (!complete.cases(allx))
+ }
+ uniquelevels<-sort(unique(byfactor[!dropped]))
+ uniques <- match(uniquelevels, byfactor)
+
+
+ if(missing(vartype)) vartype<-"se"
+ vartype<-match.arg(vartype,several.ok=TRUE)
+ nvartype<-base::which(eval(formals(sys.function())$vartype) %in% vartype)
+ if(any(is.na(nvartype))) stop("invalid vartype")
+
+ if (keep.var){
+ unwrap <-function(x){
+ rval<-c(coef(x))
+ nvar<-length(rval)
+ rval<-c(rval,c(se=SE(x),
+ ci_l=confint(x)[,1],
+ ci_u=confint(x)[,2],
+ cv=cv(x,warn=FALSE),
+ `cv%`=cv(x,warn=FALSE)*100,
+ var=SE(x)^2)[rep((nvartype-1)*(nvar),each=nvar)+(1:nvar)])
+ if(!is.null(attr(x,"deff")))
+ rval<-c(rval,DEff=deff(x))
+ rval
+ }
+
+ ## In dire need of refactoring (or rewriting)
+ ## but it seems to work.
+ results<-(if (multicore) parallel::mclapply else lapply)(uniques,
+ function(i){
+ if(verbose && !multicore) print(as.character(byfactor[i]))
+ if (inherits(formula,"formula"))
+ data<-formula
+ else
+ data<-subset(formula, byfactor %in% byfactor[i])
+ if (covmat || return.replicates) {
+ FUN(data,
+ design[byfactor %in% byfactor[i],],
+ deff=deff,...,return.replicates=TRUE)
+ } else {
+ FUN(data,
+ design[byfactor %in% byfactor[i],],
+ deff=deff,...)
+ }
+ })
+ rval<-t(sapply(results, unwrap))
+ if (covmat || return.replicates) {
+ replicates<-do.call(cbind,lapply(results,"[[","replicates"))
+ colnames(replicates)<-rep(as.character(uniquelevels), each=NCOL(replicates)/length(uniquelevels))
+ covmat.mat<-svrVar(replicates,design$scale,design$rscales, mse=design$mse,coef=as.vector(sapply(results,coef)))
+ } else{
+ covmats<-lapply(results,vcov)
+ ncovmat<-sum(sapply(covmats,ncol))
+ covmat.mat<-matrix(0,ncol=ncovmat,nrow=ncovmat)
+ j<-0
+ for(i in 1:length(covmats)){
+ ni<-nrow(covmats[[i]])
+ covmat.mat[j+(1:ni),j+(1:ni)]<-covmats[[i]]
+ j<-j+ni
+ }
+ }
+ } else {
+ unwrap2 <- function(x){
+ if(!is.null(attr(x, "deff")))
+ c(statistic = unclass(x),
+ DEff = deff(x))
+ else c(statistic = unclass(x))
+ }
+ rval<-sapply(uniques,
+ function(i) {
+ if(verbose) print(as.character(byfactor[i]))
+ if (inherits(formula,"formula"))
+ data<-formula
+ else
+ data<-subset(formula, byfactor %in% byfactor[i])
+ unwrap2(FUN(data,
+ design[byfactor %in% byfactor[i],],
+ deff=deff,...))}
+ )
+ if (is.matrix(rval)) rval<-t(rval)
+ }
+
+ nr<-NCOL(rval)
+ nstats<-nr/(1+ keep.var*(length(vartype)+ ("ci" %in% vartype)) + hasdeff)
+
+
+ if (nr>1)
+ rval<-cbind(byfactors[uniques,,drop=FALSE], rval)
+ else
+ rval <-cbind(byfactors[uniques,,drop=FALSE], statistic=rval)
+
+ expand.index<-function(index,reps,x=FALSE){
+ ns<-max(index)
+ if (x){
+ i<-matrix(1:(ns*reps),ncol=reps)
+ rval<-t(i[index,])
+
+ } else{
+ i<-matrix(1:(ns*reps), ncol=reps, nrow=ns, byrow=TRUE)
+ rval<- i[index,]
+ }
+ as.vector(rval)
+ }
+
+ if(drop.empty.groups){
+ if (keep.names)
+ rownames(rval)<-paste(byfactor[uniques])
+ rval<-rval[order(byfactor[uniques]),]
+
+ i<-expand.index(order(byfactor[uniques]),nstats)
+ if (keep.var)
+ covmat.mat<-covmat.mat[i,i]
+
+ } else {
+ a<-do.call("expand.grid", lapply(byfactors,function(f) levels(as.factor(f))))
+ a<-cbind(a,matrix(NA, ncol=nr, nrow=nrow(a)))
+ names(a)<-names(rval)
+ a[match(byfactor[uniques], levels(byfactor)),]<-rval
+ rval<-a
+ if (keep.names)
+ rownames(rval)<-levels(byfactor)
+ if (keep.var){
+ tmp<-matrix(ncol=nrow(a)*nstats,nrow=nrow(a)*nstats)
+ i<-expand.index(match(byfactor[uniques], levels(byfactor)),nstats,TRUE)
+ tmp[i,i]<-covmat.mat
+ covmat.mat<-tmp
+ }
+ }
+
+ attr(rval,"svyby")<-list(margins=1:NCOL(byfactors),nstats=nstats,
+ vars=if(keep.var) length(vartype) else 0,
+ deffs=deff,
+ statistic=deparse(substitute(FUN)),
+ variables= names(rval)[-(1:NCOL(byfactors))][1:nstats],
+ vartype=vartype
+ )
+ if (!keep.names)
+ rownames(rval)<-1:NROW(rval)
+
+ if(covmat)
+ attr(rval,"var")<-covmat.mat
+ if (return.replicates)
+ attr(rval,"replicates")<-replicates
+ attr(rval,"call")<-sys.call()
+ class(rval)<-c("svyby","data.frame")
+ rval
+}
+
+SE.svyby <-function(object,...){
+ aa<-attr(object,"svyby")
+ if (!aa$vars) stop("Object does not contain variances")
+ vartype<-attr(object,"svyby")$vartype
+ if (pos<-match("se",vartype,0))
+ object[,max(aa$margins)+aa$nstats*pos+(1:aa$nstats)]
+ else if (pos<-match("var",vartype,0))
+ sqrt(object[,max(aa$margins)+aa$nstats*pos+(1:aa$nstats)])
+ else if (pos<-match("cv",vartype,0))
+ object[,max(aa$margins)+aa$nstats*pos+(1:aa$nstats)]*coef(object)
+ else if (pos<-match("cvpct",vartype,0))
+ object[,max(aa$margins)+aa$nstats*pos+(1:aa$nstats)]*coef(object)/100
+ else stop("This can't happen")
+
+}
+
+coef.svyby<-function (object, ...)
+{
+ aa <- attr(object, "svyby")
+ rval <- object[, max(aa$margins) + (1:aa$nstats)]
+ if (is.null(dim(rval))){
+ names(rval) <- row.names(object)
+ } else {
+ rval<-as.vector(as.matrix(rval))
+ names(rval)<-outer(rownames(object),
+ gsub("statistics\\.","",aa$variables), paste, sep=":")
+ }
+ rval
+}
+
+deff.svyby<-function(object,...){
+ aa<-attr(object,"svyby")
+ if (!aa$deffs) stop("object does not have design effect information")
+ object[,max(aa$margins)+aa$nstats*(1+aa$vars)+(1:aa$nstats)]
+}
+
+vcov.svyby<-function(object,...){
+ rval<-attr(object,"var")
+ if(is.null(rval)){
+ warning("Only diagonal elements of vcov() available")
+ se<-SE(object)
+ if (is.data.frame(se)) se<-as.vector(as.matrix(se))
+ if(length(se)>1)
+ rval<-diag(se^2)
+ else
+ rval<-as.matrix(se^2)
+ }
+ nms<-names(coef(object))
+ dimnames(rval)<-list(nms,nms)
+ rval
+}
+
+confint.svyquantile<-function(object,parm=NULL,level=NULL,...){
+ if (!is.null(level)) stop("need to re-run svyquantile to specify level")
+ ci<-t(matrix(as.vector(object$CIs),nrow=2))
+ colnames(ci)<-dimnames(object$CIs)[[1]]
+ rownames(ci)<-outer(dimnames(object$CIs)[[2]],
+ dimnames(object$CIs)[[3]],paste,sep="_")
+ if (is.null(parm))
+ ci
+ else
+ ci[parm,,drop=FALSE]
+}
+
diff --git a/R/surveychisq.R b/R/surveychisq.R
new file mode 100755
index 0000000..ae2423a
--- /dev/null
+++ b/R/surveychisq.R
@@ -0,0 +1,444 @@
+##
+## Tests for contingency tables
+##
+
+
+svychisq<-function(formula, design,...) UseMethod("svychisq",design)
+
+
+svychisq.survey.design<-function(formula, design,
+ statistic=c("F","Chisq","Wald","adjWald","lincom","saddlepoint"),
+ na.rm=TRUE,...){
+ if (ncol(attr(terms(formula),"factors"))>2)
+ stop("Only 2-way tables at the moment")
+ statistic<-match.arg(statistic)
+
+ ##if(!is.null(design$postStrata))
+ ## warning("Post-stratification not implemented")
+
+ rows<-formula[[2]][[2]]
+ cols<-formula[[2]][[3]]
+ rowvar<-unique(design$variables[,as.character(rows)])
+ colvar<-unique(design$variables[,as.character(cols)])
+ returnNA<-FALSE
+ if ((any(is.na(rowvar),is.na(colvar)))){
+ rowvar<-na.omit(rowvar)
+ colvar<-na.omit(colvar)
+ returnNA<-!na.rm
+ }
+ nr<-length(rowvar)
+ nc<-length(colvar)
+
+ fsat<-eval(bquote(~interaction(factor(.(rows)),factor(.(cols)))-1))
+ mm<-model.matrix(fsat,model.frame(fsat, design$variables,na.action=na.pass))
+ N<-nrow(mm)
+ nu <- length(unique(design$cluster[,1]))-length(unique(design$strata[,1]))
+
+
+ pearson<- suppressWarnings(chisq.test(svytable(formula,design,Ntotal=N),
+ correct=FALSE))
+
+
+ mf1<-expand.grid(rows=1:nr,cols=1:nc)
+ X1<-model.matrix(~factor(rows)+factor(cols),mf1)
+ X12<-model.matrix(~factor(rows)*factor(cols),mf1)
+
+
+ if(statistic %in% c("Wald", "adjWald")){
+ frow<-eval(bquote(~factor(.(rows))-1))
+ fcol<-eval(bquote(~factor(.(cols))-1))
+ mr<-model.matrix(frow, model.frame(frow,design$variables, na.action=na.pass))
+ mc<-model.matrix(fcol, model.frame(fcol,design$variables, na.action=na.pass))
+ one<-rep(1,NROW(mc))
+ cells<-svytotal(~mm+mr+mc+one,design,na.rm=TRUE)
+
+ Jcb <- cbind(diag(nr*nc),
+ -outer(mf1$rows,1:nr,"==")*rep(cells[(nr*nc)+nr+1:nc]/cells[(nr*nc)+nr+nc+1],each=nr),
+ -outer(mf1$cols,1:nc,"==")*cells[(nr*nc)+1:nr]/cells[(nr*nc)+nr+nc+1],
+ as.vector(outer(cells[(nr*nc)+1:nr],cells[(nr*nc+nr)+1:nc])/cells[(nr*nc)+nr+nc+1]^2))
+
+ Y<-cells[1:(nc*nr)]-as.vector(outer(cells[(nr*nc)+1:nr],cells[(nr*nc+nr)+1:nc]))/cells[(nr*nc)+nr+nc+1]
+ V<-Jcb%*%attr(cells,"var")%*%t(Jcb)
+ use<-as.vector(matrix(1:(nr*nc),nrow=nr,ncol=nc)[-1,-1])
+ waldstat<-Y[use]%*%solve(V[use,use],Y[use])
+ if (statistic=="Wald"){
+ waldstat<-waldstat/((nc-1)*(nr-1))
+ numdf<-(nc-1)*(nr-1)
+ denomdf<-nu
+ } else {
+ numdf<-(nr-1)*(nc-1)
+ denomdf<-(nu-numdf+1)
+ waldstat <- waldstat*denomdf/(numdf*nu)
+ }
+ if (returnNA){
+ pearson$statistic<-NA
+ pearson$parameter<-c(ndf=numdf,ddf=denomdf)
+ pearson$p.value<-NA
+ attr(pearson$statistic,"names")<-"F"
+ pearson$data.name<-deparse(sys.call(-1))
+ pearson$method<-"Design-based Wald test of association"
+ } else {
+ pearson$statistic<-waldstat
+ pearson$parameter<-c(ndf=numdf,ddf=denomdf)
+ pearson$p.value<-pf(pearson$statistic, numdf, denomdf, lower.tail=FALSE)
+ attr(pearson$statistic,"names")<-"F"
+ pearson$data.name<-deparse(sys.call(-1))
+ pearson$method<-"Design-based Wald test of association"
+ }
+ return(pearson)
+ }
+
+ mean2<-svymean(mm,design,na.rm=TRUE)
+
+
+
+
+ Cmat<-qr.resid(qr(X1),X12[,-(1:(nr+nc-1)),drop=FALSE])
+ Dmat <- diag(mean2)
+ iDmat<- diag(ifelse(mean2==0,0,1/mean2))
+ Vsrs <- (Dmat - outer(mean2,mean2))/N
+ V <- attr(mean2,"var")
+ denom<- t(Cmat) %*% (iDmat/N) %*% Cmat
+ numr<-t(Cmat)%*% iDmat %*% V %*% iDmat %*% Cmat
+ Delta<-solve(denom,numr)
+ d0<- sum(diag(Delta))^2/(sum(diag(Delta%*%Delta)))
+
+ warn<-options(warn=-1) ## turn off the small-cell count warning.
+ pearson<- chisq.test(svytable(formula,design,Ntotal=N),
+ correct=FALSE)
+ options(warn)
+
+ if (match.arg(statistic)=="F"){
+ pearson$statistic<-pearson$statistic/sum(diag(Delta))
+ pearson$p.value<-pf(pearson$statistic, d0, d0*nu, lower.tail=FALSE)
+ attr(pearson$statistic,"names")<-"F"
+ pearson$parameter<-c(ndf=d0,ddf=d0*nu)
+ pearson$method<-"Pearson's X^2: Rao & Scott adjustment"
+ } else if (match.arg(statistic)=="lincom") {
+ pearson$p.value<-pFsum(pearson$statistic, rep(1,ncol(Delta)), eigen(Delta,only.values=TRUE)$values,
+ lower.tail=FALSE,method="integration",ddf=d0*nu)
+ pearson$parameter<-NULL
+ pearson$method<-"Pearson's X^2: asymptotic exact distribution"
+ } else if (match.arg(statistic)=="saddlepoint") {
+ pearson$p.value<-pFsum(pearson$statistic, rep(1,ncol(Delta)), eigen(Delta,only.values=TRUE)$values,
+ lower.tail=FALSE,method="saddlepoint",ddf=d0*nu)
+ pearson$parameter<-NULL
+ pearson$method<-"Pearson's X^2: saddlepoint approximation"
+ } else{
+ pearson$p.value<-pchisq(pearson$statistic/mean(diag(Delta)),
+ df=NCOL(Delta),lower.tail=FALSE)
+ pearson$parameter<-c(df=NCOL(Delta))
+ pearson$method<-"Pearson's X^2: Rao & Scott adjustment"
+ }
+
+ if (returnNA){
+ pearson$statistic<-NA
+ pearson$p.value<-NA
+ }
+
+ pearson$data.name<-deparse(sys.call(-1))
+
+ pearson
+
+}
+
+svychisq.twophase<-function(formula, design,
+ statistic=c("F","Chisq","Wald","adjWald","lincom","saddlepoint"),
+ na.rm=TRUE,...){
+ if (ncol(attr(terms(formula),"factors"))>2)
+ stop("Only 2-way tables at the moment")
+ statistic<-match.arg(statistic)
+
+ ##if(!is.null(design$postStrata))
+ ## warning("Post-stratification not implemented")
+
+ rows<-formula[[2]][[2]]
+ cols<-formula[[2]][[3]]
+ dat<-design$phase1$sample$variables
+ rowvar<-unique(dat[,as.character(rows)])
+ colvar<-unique(dat[,as.character(cols)])
+ returnNA<-FALSE
+ if ((any(is.na(rowvar),is.na(colvar)))){
+ rowvar<-na.omit(rowvar)
+ colvar<-na.omit(colvar)
+ returnNA<-!na.rm
+ }
+ nr<-length(rowvar)
+ nc<-length(colvar)
+
+ fsat<-eval(bquote(~interaction(factor(.(rows)),factor(.(cols)))-1))
+ mm<-model.matrix(fsat,model.frame(fsat, dat,na.action=na.pass))
+ N<-nrow(mm)
+ nu <- length(unique(design$phase2$cluster[,1]))-length(unique(design$phase2$strata[,1]))
+
+
+ pearson<- suppressWarnings(chisq.test(svytable(formula,design,Ntotal=N),
+ correct=FALSE))
+
+
+ mf1<-expand.grid(rows=1:nr,cols=1:nc)
+ X1<-model.matrix(~factor(rows)+factor(cols),mf1)
+ X12<-model.matrix(~factor(rows)*factor(cols),mf1)
+
+
+ if(statistic %in% c("Wald", "adjWald")){
+ frow<-eval(bquote(~factor(.(rows))-1))
+ fcol<-eval(bquote(~factor(.(cols))-1))
+ mr<-model.matrix(frow, model.frame(frow,dat, na.action=na.pass))
+ mc<-model.matrix(fcol, model.frame(fcol,dat, na.action=na.pass))
+ one<-rep(1,NROW(mc))
+ cells<-svytotal(~mm+mr+mc+one,design,na.rm=TRUE)
+
+ Jcb <- cbind(diag(nr*nc),
+ -outer(mf1$rows,1:nr,"==")*rep(cells[(nr*nc)+nr+1:nc]/cells[(nr*nc)+nr+nc+1],each=nr),
+ -outer(mf1$cols,1:nc,"==")*cells[(nr*nc)+1:nr]/cells[(nr*nc)+nr+nc+1],
+ as.vector(outer(cells[(nr*nc)+1:nr],cells[(nr*nc+nr)+1:nc])/cells[(nr*nc)+nr+nc+1]^2))
+
+ Y<-cells[1:(nc*nr)]-as.vector(outer(cells[(nr*nc)+1:nr],cells[(nr*nc+nr)+1:nc]))/cells[(nr*nc)+nr+nc+1]
+ V<-Jcb%*%attr(cells,"var")%*%t(Jcb)
+ use<-as.vector(matrix(1:(nr*nc),nrow=nr,ncol=nc)[-1,-1])
+ waldstat<-Y[use]%*%solve(V[use,use],Y[use])
+ if (statistic=="Wald"){
+ waldstat<-waldstat/((nc-1)*(nr-1))
+ numdf<-(nc-1)*(nr-1)
+ denomdf<-nu
+ } else {
+ numdf<-(nr-1)*(nc-1)
+ denomdf<-(nu-numdf+1)
+ waldstat <- waldstat*denomdf/(numdf*nu)
+ }
+ if (returnNA){
+ pearson$statistic<-NA
+ pearson$parameter<-c(ndf=numdf,ddf=denomdf)
+ pearson$p.value<-NA
+ attr(pearson$statistic,"names")<-"F"
+ pearson$data.name<-deparse(sys.call(-1))
+ pearson$method<-"Design-based Wald test of association"
+ } else {
+ pearson$statistic<-waldstat
+ pearson$parameter<-c(ndf=numdf,ddf=denomdf)
+ pearson$p.value<-pf(pearson$statistic, numdf, denomdf, lower.tail=FALSE)
+ attr(pearson$statistic,"names")<-"F"
+ pearson$data.name<-deparse(sys.call(-1))
+ pearson$method<-"Design-based Wald test of association"
+ }
+ return(pearson)
+ }
+
+ mean2<-svymean(mm,design,na.rm=TRUE)
+
+
+
+
+ Cmat<-qr.resid(qr(X1),X12[,-(1:(nr+nc-1)),drop=FALSE])
+ Dmat <- diag(mean2)
+ iDmat<- diag(ifelse(mean2==0,0,1/mean2))
+ Vsrs <- (Dmat - outer(mean2,mean2))/N
+ V <- attr(mean2,"var")
+ denom<- t(Cmat) %*% (iDmat/N) %*% Cmat
+ numr<-t(Cmat)%*% iDmat %*% V %*% iDmat %*% Cmat
+ Delta<-solve(denom,numr)
+ d0<- sum(diag(Delta))^2/(sum(diag(Delta%*%Delta)))
+
+ warn<-options(warn=-1) ## turn off the small-cell count warning.
+ pearson<- chisq.test(svytable(formula,design,Ntotal=N),
+ correct=FALSE)
+ options(warn)
+
+ if (match.arg(statistic)=="F"){
+ pearson$statistic<-pearson$statistic/sum(diag(Delta))
+ pearson$p.value<-pf(pearson$statistic, d0, d0*nu, lower.tail=FALSE)
+ attr(pearson$statistic,"names")<-"F"
+ pearson$parameter<-c(ndf=d0,ddf=d0*nu)
+ pearson$method<-"Pearson's X^2: Rao & Scott adjustment"
+ } else if (match.arg(statistic)=="lincom") {
+ pearson$p.value<-pFsum(pearson$statistic, rep(1,ncol(Delta)), eigen(Delta,only.values=TRUE)$values,
+ lower.tail=FALSE,method="integration",ddf=d0*nu)
+ pearson$parameter<-NULL
+ pearson$method<-"Pearson's X^2: asymptotic exact distribution"
+ } else if (match.arg(statistic)=="saddlepoint") {
+ pearson$p.value<-pFsum(pearson$statistic, rep(1,ncol(Delta)), eigen(Delta,only.values=TRUE)$values,
+ lower.tail=FALSE,method="saddlepoint",ddf=d0*nu)
+ pearson$parameter<-NULL
+ pearson$method<-"Pearson's X^2: saddlepoint approximation"
+ } else{
+ pearson$p.value<-pchisq(pearson$statistic/mean(diag(Delta)),
+ df=NCOL(Delta),lower.tail=FALSE)
+ pearson$parameter<-c(df=NCOL(Delta))
+ pearson$method<-"Pearson's X^2: Rao & Scott adjustment"
+ }
+
+ if (returnNA){
+ pearson$statistic<-NA
+ pearson$p.value<-NA
+ }
+
+ pearson$data.name<-deparse(sys.call(-1))
+
+ pearson
+
+}
+
+
+svychisq.svyrep.design<-function(formula, design,
+ statistic=c("F","Chisq","Wald","adjWald","lincom","saddlepoint"),
+ na.rm=TRUE,...){
+ if (ncol(attr(terms(formula),"factors"))>2)
+ stop("Only 2-way tables at the moment")
+ statistic<-match.arg(statistic)
+
+ rows<-formula[[2]][[2]]
+ cols<-formula[[2]][[3]]
+ rowvar<-unique(design$variables[,as.character(rows)])
+ colvar<-unique(design$variables[,as.character(cols)])
+ returnNA<-FALSE
+ if ((any(is.na(rowvar),is.na(colvar)))){
+ rowvar<-na.omit(rowvar)
+ colvar<-na.omit(colvar)
+ returnNA<-!na.rm
+ }
+ nr<-length(rowvar)
+ nc<-length(colvar)
+
+ fsat<-eval(bquote(~interaction(factor(.(rows)),factor(.(cols)))-1))
+ mm<-model.matrix(fsat,model.frame(fsat, design$variables,na.action=na.pass))
+ N<-nrow(mm)
+ nu <- degf(design)
+
+
+ pearson<- suppressWarnings(chisq.test(svytable(formula,design,Ntotal=N),
+ correct=FALSE))
+
+
+ mf1<-expand.grid(rows=1:nr,cols=1:nc)
+ X1<-model.matrix(~factor(rows)+factor(cols),mf1)
+ X12<-model.matrix(~factor(rows)*factor(cols),mf1)
+
+
+ if(statistic %in% c("Wald", "adjWald")){
+ frow<-eval(bquote(~factor(.(rows))-1))
+ fcol<-eval(bquote(~factor(.(cols))-1))
+ mr<-model.matrix(frow, model.frame(frow,design$variables, na.action=na.pass))
+ mc<-model.matrix(fcol, model.frame(fcol,design$variables, na.action=na.pass))
+ one<-rep(1,NROW(mc))
+ cells<-svytotal(~mm+mr+mc+one,design,na.rm=TRUE)
+
+ Jcb <- cbind(diag(nr*nc),
+ -outer(mf1$rows,1:nr,"==")*rep(cells[(nr*nc)+nr+1:nc]/cells[(nr*nc)+nr+nc+1],each=nr),
+ -outer(mf1$cols,1:nc,"==")*cells[(nr*nc)+1:nr]/cells[(nr*nc)+nr+nc+1],
+ as.vector(outer(cells[(nr*nc)+1:nr],cells[(nr*nc+nr)+1:nc])/cells[(nr*nc)+nr+nc+1]^2))
+
+ Y<-cells[1:(nc*nr)]-as.vector(outer(cells[(nr*nc)+1:nr],cells[(nr*nc+nr)+1:nc]))/cells[(nr*nc)+nr+nc+1]
+ V<-Jcb%*%attr(cells,"var")%*%t(Jcb)
+ use<-as.vector(matrix(1:(nr*nc),nrow=nr,ncol=nc)[-1,-1])
+ waldstat<-Y[use]%*%solve(V[use,use],Y[use])
+ if (statistic=="Wald"){
+ waldstat<-waldstat/((nc-1)*(nr-1))
+ numdf<-(nc-1)*(nr-1)
+ denomdf<-nu
+ } else {
+ numdf<-(nr-1)*(nc-1)
+ denomdf<-(nu-numdf+1)
+ waldstat <- waldstat*denomdf/(numdf*nu)
+ }
+ if (returnNA){
+ pearson$statistic<-NA
+ pearson$parameter<-c(ndf=numdf,ddf=denomdf)
+ pearson$p.value<-NA
+ attr(pearson$statistic,"names")<-"F"
+ pearson$data.name<-deparse(sys.call(-1))
+ pearson$method<-"Design-based Wald test of association"
+ } else {
+ pearson$statistic<-waldstat
+ pearson$parameter<-c(ndf=numdf,ddf=denomdf)
+ pearson$p.value<-pf(pearson$statistic, numdf, denomdf, lower.tail=FALSE)
+ attr(pearson$statistic,"names")<-"F"
+ pearson$data.name<-deparse(sys.call(-1))
+ pearson$method<-"Design-based Wald test of association"
+ }
+ return(pearson)
+ }
+
+ mean2<-svymean(mm,design,na.rm=TRUE)
+
+
+
+
+ Cmat<-qr.resid(qr(X1),X12[,-(1:(nr+nc-1)),drop=FALSE])
+ Dmat <- diag(mean2)
+ iDmat<- diag(ifelse(mean2==0,0,1/mean2))
+ Vsrs <- (Dmat - outer(mean2,mean2))/N
+ V <- attr(mean2,"var")
+ denom<- t(Cmat) %*% (iDmat/N) %*% Cmat
+ numr<-t(Cmat)%*% iDmat %*% V %*% iDmat %*% Cmat
+ Delta<-solve(denom,numr)
+ d0<- sum(diag(Delta))^2/(sum(diag(Delta%*%Delta)))
+
+ warn<-options(warn=-1) ## turn off the small-cell count warning.
+ pearson<- chisq.test(svytable(formula,design,Ntotal=N),
+ correct=FALSE)
+ options(warn)
+
+ if (match.arg(statistic)=="F"){
+ pearson$statistic<-pearson$statistic/sum(diag(Delta))
+ pearson$p.value<-pf(pearson$statistic, d0, d0*nu, lower.tail=FALSE)
+ attr(pearson$statistic,"names")<-"F"
+ pearson$parameter<-c(ndf=d0,ddf=d0*nu)
+ } else if (match.arg(statistic)=="lincom") {
+ pearson$p.value<-pchisqsum(pearson$statistic, rep(1,ncol(Delta)), eigen(Delta,only.values=TRUE)$values,
+ lower.tail=FALSE,method="integration")
+ pearson$parameter<-NULL
+ pearson$method<-"Pearson's X^2: asymptotic exact distribution"
+ }else if (match.arg(statistic)=="saddlepoint") {
+ pearson$p.value<-pchisqsum(pearson$statistic, rep(1,ncol(Delta)), eigen(Delta,only.values=TRUE)$values,
+ lower.tail=FALSE,method="saddlepoint")
+ pearson$parameter<-NULL
+ pearson$method<-"Pearson's X^2: saddlepoint approximation"
+ } else {
+ pearson$p.value<-pchisq(pearson$statistic/mean(diag(Delta)),
+ df=NCOL(Delta),lower.tail=FALSE)
+ pearson$parameter<-c(df=NCOL(Delta))
+ }
+
+ if (returnNA){
+ pearson$statistic<-NA
+ pearson$p.value<-NA
+ }
+
+ pearson$data.name<-deparse(sys.call(-1))
+ pearson$method<-"Pearson's X^2: Rao & Scott adjustment"
+ pearson
+
+}
+
+
+
+summary.svreptable<-function(object,...){
+ object
+}
+
+summary.svytable<-function(object, statistic=c("F","Chisq","Wald","adjWald","lincom","saddlepoint"),...){
+
+ statistic<-match.arg(statistic)
+ call<-attr(object, "call")
+ ff<-call$formula
+
+ if (is.null(environment(ff)))
+ env<-parent.frame()
+ else
+ env<-environment(ff)
+
+ ff<-delete.response(ff)
+
+ test<-eval(bquote(svychisq(.(ff), design=.(call$design),
+ statistic=.(statistic))), env)
+
+ rval<-list(table=object,statistic=test)
+ class(rval)<-"summary.svytable"
+ rval
+}
+
+print.summary.svytable<-function(x,digits=0,...){
+ print(round(x$table,digits))
+ print(x$statistic,...)
+}
diff --git a/R/surveygraph.R b/R/surveygraph.R
new file mode 100755
index 0000000..684916f
--- /dev/null
+++ b/R/surveygraph.R
@@ -0,0 +1,244 @@
+make.panel.svysmooth<-function(design,bandwidth=NULL){
+ function(x,y,span=NULL,col.smooth="red",col=par("col"),bg=NA,pch=par("pch"),cex=1,...){
+ if(!is.null(span))
+ bandwidth<-diff(range(x))*span/3
+ s<-svysmooth(y~x,design=design,bandwidth=bandwidth)
+ points(x,y,pch=pch,bg=bg,col=col)
+ lines(s[[1]],col=col.smooth,...)
+ }
+}
+
+
+svyplot<-function(formula, design,...) UseMethod("svyplot",design)
+svyplot.default<-function(formula,
+ design,
+ style=c("bubble","hex","grayhex","subsample","transparent"),
+ sample.size=500, subset=NULL,legend=1,inches=0.05,
+ amount=NULL,basecol="black",alpha=c(0,0.8), xbins=30,...){
+
+ style<-match.arg(style)
+ if (style %in% c("hex","grayhex") && !requireNamespace("hexbin",quietly=TRUE)){
+ stop(style," plots require the hexbin package")
+ }
+
+ subset<-substitute(subset)
+ subset<-with(design$variables, subset)
+ if(length(subset)>0)
+ design<-design[subset,]
+
+ W<-weights(design, "sampling")
+
+ mf<-model.frame(formula, design$variables,na.action=na.pass)
+ Y<-model.response(mf)
+ X<-mf[,attr(attr(mf,"terms"),"term.labels")]
+
+ switch(style,
+ bubble={
+ if(is.function(basecol)) basecol<-basecol(model.frame(design))
+ symbols(X,Y,circles=sqrt(W),inches=inches,fg=basecol,...)
+ },
+ hex={
+ ## CRAN will be happier if we stop supporting the old version of hexbin
+ ## new version
+ rval<-hexbin::hexbin(X,Y,IDs=TRUE,xbins=xbins)
+ cell<-rval at cID
+ rval at count<-as.vector(tapply(W,cell,sum))
+ rval at xcm<-as.vector(tapply(1:length(X), cell,
+ function(ii) weighted.mean(X[ii],W[ii])))
+ rval at ycm<-as.vector(tapply(1:length(Y), cell,
+ function(ii) weighted.mean(Y[ii],W[ii])))
+ hexbin::gplot.hexbin(rval, legend=legend, style="centroids",...)
+
+
+ },
+ grayhex={
+ ## new version
+ rval<-hexbin::hexbin(X,Y,IDs=TRUE,xbins=xbins)
+ cell<-rval at cID
+ rval at count<-as.vector(tapply(W,cell,sum))
+ hexbin::gplot.hexbin(rval, legend=legend,...)
+
+ },
+ subsample={
+ index<-sample(length(X),sample.size,replace=TRUE, prob=W)
+ if (is.numeric(X))
+ xs<-jitter(X[index],factor=3,amount=amount$x)
+ else
+ xs<-X[index]
+ if (is.numeric(Y))
+ ys<-jitter(Y[index],factor=3,amount=amount$y)
+ else
+ ys<-Y[index]
+ plot(xs,ys,...)
+ },
+ transparent={
+ transcol<-function(base,opacity){
+ rgbs<-col2rgb(base)/255
+ rgb(rgbs[1,],rgbs[2,], rgbs[3,], alpha=opacity)
+ }
+ if(is.function(basecol)) basecol<-basecol(model.frame(design))
+ w<-weights(design)
+ maxw<-max(w)
+ minw<-0
+ alphas<- (alpha[1]*(maxw-w)+alpha[2]*(w-minw))/(maxw-minw)
+ plot(X,Y,col=transcol(basecol,alphas),...)
+ })
+
+}
+
+svyboxplot<-function(formula, design,all.outliers=FALSE,...) UseMethod("svyboxplot",design)
+svyboxplot.default<-function(formula, design, all.outliers=FALSE,col=NULL,names,...){
+
+ formula<-as.formula(formula)
+ if(length(formula)!=3) stop("need a two-sided formula")
+ ##if(length(formula[[3]])>2) stop("only one rhs variable allowed")
+
+ outcome<-eval(bquote(~.(formula[[2]])))
+ outcome.values<-model.frame(outcome, model.frame(design),na.action=na.pass)
+
+ if (length(attr(terms(formula),"term.labels"))){
+ groups<-eval(bquote(~.(formula[[3]])))
+ qs <- svyby(outcome,groups,design,svyquantile,ci=FALSE,
+ keep.var=FALSE,
+ quantiles=c(0,0.25,0.5,0.75,1),na.rm=TRUE)
+ group.values<-model.frame(groups, model.frame(design),na.action=na.pass)[[1]]
+ n<-NCOL(qs)
+ iqr<- qs[,n-1]-qs[,n-3]
+ low<-pmax(qs[,n-4],qs[,n-3]-1.5*iqr)
+ hi<-pmin(qs[,n],qs[,n-1]+1.5*iqr)
+ stats<-t(as.matrix(cbind(low,qs[,n-(3:1)],hi)))
+ z<-list(stats=stats,n=coef(svytotal(groups,design,na.rm=TRUE)))
+ for(i in 1:ncol(stats)){
+ out<-c(if(qs[i,n]!=hi[i]) qs[i,n],
+ if(qs[i,n-4]!=low[i])qs[i,n-4])
+ if (all.outliers){
+ outlo<-sort(outcome.values[!is.na(outcome.values) & (as.numeric(group.values) %in% i) & outcome.values<low[i] ])
+ outhi<-sort(outcome.values[!is.na(outcome.values) & (as.numeric(group.values) %in% i) & outcome.values>hi[i] ])
+ out<-na.omit(unique(c(outlo,outhi)))
+ }
+ z$out<-c(z$out,out)
+ z$group<-c(z$group,rep(i,length(out)))
+ z$names<-as.character(qs[,1])
+ }
+ } else {
+ qs<-svyquantile(outcome,design,ci=FALSE,
+ quantiles=c(0,0.25,0.5,0.75,1),na.rm=TRUE)
+ iqr<-qs[4]-qs[2]
+ z<-list(stats=matrix(c(max(qs[1],qs[2]-1.5*iqr),
+ qs[2:4],min(qs[5],qs[4]+1.5*iqr))),
+ n=sum(weights(design,"sampling")))
+ z$out<-c(if(qs[5]!=z$stats[5]) qs[5],
+ if(qs[1]!=z$stats[1]) qs[1])
+ if (all.outliers){
+ outlo<-sort(outcome.values[!is.na(outcome.values) & outcome.values<qs[2]-1.5*iqr ])
+ outhi<-sort(outcome.values[!is.na(outcome.values) & outcome.values>qs[4]+1.5*iqr])
+ z$out<-na.omit(unique(c(outlo,outhi)))
+ }
+ z$group<-rep(1,length(z$out))
+ }
+ if (is.null(col)) col<-par("bg")
+ if (!missing(names)) z$names<-names
+ bxp(z,boxfill=col,...)
+}
+
+
+
+svycoplot<-function(formula, design, style=c("hexbin","transparent"),
+ basecol="black",alpha=c(0,0.8),hexscale=c("relative","absolute"),...) UseMethod("svycoplot",design)
+svycoplot.default<-function(formula, design, style=c("hexbin","transparent"),
+ basecol="black",alpha=c(0,0.8),hexscale=c("relative","absolute"),xbins=15,...){
+ style<-match.arg(style)
+ wt<-weights(design,"sampling")
+
+ switch(style,
+ hexbin={
+ hexscale<-match.arg(hexscale)
+ xyplot(formula, data=model.frame(design), xbins=xbins,
+ panel=function(x,y,style="centroids",xbins,subscripts,...) {
+ if (!length(x)) return(panel.xyplot(x,y,...))
+ vp<-current.viewport()
+ wd<-convertWidth(vp$width,unitTo="cm",valueOnly=TRUE)
+ ht<-convertHeight(vp$height,unitTo="cm",valueOnly=TRUE)
+ W<-wt[subscripts]
+ rval<-hexbin::hexbin(x,y,IDs=TRUE,xbins=xbins,shape=ht/wd,xbnds=vp$xscale,ybnds=vp$yscale)
+ cell<-rval at cID
+ rval at count<-as.vector(tapply(W,cell,sum))
+ rval at xcm<-as.vector(tapply(1:length(x), cell,
+ function(ii) weighted.mean(x[ii],W[ii])))
+ rval at ycm<-as.vector(tapply(1:length(y), cell,
+ function(ii) weighted.mean(x[ii],W[ii])))
+ hexbin::grid.hexagons(rval,style=style, maxarea=switch(hexscale, relative=0.8,
+ absolute=0.8*sum(W)/sum(wt)))
+ },...)
+ }, transparent={
+ if(is.function(basecol)) basecol<-basecol(model.frame(design))
+ transcol<-function(base,opacity){
+ rgbs<-col2rgb(base)/255
+ rgb(rgbs[1,],rgbs[2,], rgbs[3,], alpha=opacity)
+ }
+ maxw<-max(wt)
+ minw<-0
+ alphas<- (alpha[1]*(maxw-wt)+alpha[2]*(wt-minw))/(maxw-minw)
+ cols<-transcol(basecol,alphas)
+ xyplot(formula, data=model.frame(design),
+ panel=function(x,y,basecol="black",subscripts,...) {
+ a<-alphas[subscripts]
+ panel.xyplot(x,y,col=cols[subscripts],pch=19,...)
+ },...)
+ }
+ )
+}
+
+
+barplot.svystat<-function(height,...) barplot(coef(height),...)
+barplot.svrepstat<-function(height,...) barplot(coef(height),...)
+
+plot.svystat<-function(x,...) barplot(coef(x),...)
+plot.svrepstat<-function(x,...) barplot(coef(x),...)
+
+barplot.svyby<-function(height,beside=TRUE,...){
+ aa <- attr(height, "svyby")
+ rval <- height[, max(aa$margins) + (1:aa$nstats)]
+ if (is.null(dim(rval))) {
+ if (length(aa$margins)<2){
+ names(rval) <- row.names(height)
+ } else {
+ rval<-matrix(rval, nrow=length(unique(height[,aa$margins[1]])))
+ rownames(rval) <- unique(height[,aa$margins[1]])
+ colnames(rval)<-levels(do.call(interaction, height[,aa$margins[-1],drop=FALSE]))
+ }
+ } else {
+ rval <- as.matrix(rval)
+ colnames(rval)<-sub("statistics\\.","",colnames(rval))
+ rval<-t(rval)
+ }
+ barplot(rval,beside=beside,...)
+}
+
+plot.svyby<-function(x,...) barplot.svyby(x,...)
+
+
+dotchart.default<-graphics::dotchart
+dotchart<-function(x,...,pch=19) UseMethod("dotchart")
+dotchart.svystat<-function(x,...,pch=19) dotchart(coef(x),...,pch=pch)
+dotchart.svrepstat<-function(x,...,pch=19) dotchart(coef(x),...,pch=pch)
+
+dotchart.svyby<-function(x,...,pch=19){
+ height<-x
+ aa <- attr(height, "svyby")
+ rval <- height[, max(aa$margins) + (1:aa$nstats)]
+ if (is.null(dim(rval))) {
+ if (length(aa$margins)<2){
+ names(rval) <- row.names(height)
+ } else {
+ rval<-matrix(rval, nrow=length(unique(height[,aa$margins[1]])))
+ rownames(rval) <- unique(height[,aa$margins[1]])
+ colnames(rval)<-levels(do.call(interaction, height[,aa$margins[-1],drop=FALSE]))
+ }
+ } else {
+ rval <- as.matrix(rval)
+ colnames(rval)<-sub("statistics\\.","",colnames(rval))
+ rval<-t(rval)
+ }
+ dotchart(rval,...,pch=pch)
+}
diff --git a/R/surveyrep.R b/R/surveyrep.R
new file mode 100755
index 0000000..2642d63
--- /dev/null
+++ b/R/surveyrep.R
@@ -0,0 +1,2149 @@
+
+
+hadamard.doubler<-function(H){
+ rbind(cbind(H,H),cbind(H,1-H))
+ }
+
+hadamard<- function(n){
+ m<-n-(n %% 4)
+ ## hadamard.list, hadamard.sizes in sysdata.rda
+ precooked<- which(m < hadamard.sizes & m+4 >=hadamard.sizes)
+ if (length(precooked))
+ return(hadamard.list[[min(precooked)]])
+ if (all(m<hadamard.sizes))
+ return(hadamard.list[[1]])
+
+ sizes<-hadamard.sizes*2^pmax(0,ceiling(log((n+1)/hadamard.sizes,2)) )
+ bestfit<- which.min(sizes-n)
+ H<-NULL
+ if (sizes[bestfit]-n >4)
+ H<-paley(n,sizes[bestfit])
+ if (is.null(H)){
+ ndoubles<-ceiling(log(sizes/hadamard.sizes, 2))[bestfit]
+ H<-hadamard.list[[bestfit]]
+ for(i in seq(length=ndoubles))
+ H<-hadamard.doubler(H)
+ }
+ H
+ }
+
+
+
+jk1weights<-function(psu, fpc=NULL,
+ fpctype=c("population","fraction","correction"),
+ compress=TRUE){
+ fpctype<-match.arg(fpctype)
+ unq<-unique(psu)
+ n<-length(unq)
+ if (is.null(fpc))
+ fpc<-1
+ else {
+ fpc<-unique(fpc)
+ if (length(fpc)>1) stop("More than one fpc value given")
+ if (fpc<0) stop("Negative finite population correction")
+ if (fpctype=="population" && fpc<n) stop("Population size smaller than sample size. No can do.")
+ fpc <-switch(fpctype, population=(fpc-n)/fpc, fraction=1-fpc, correction=fpc)
+ }
+
+ if (compress){
+ if(fpc==0 && getOption("survey.drop.replicates")) ## exhaustively sampled strata do not need replicates.
+ repweights<-matrix(ncol=0,nrow=length(psu))
+ else {
+ repweights<-matrix(n/(n-1),n,n)
+ diag(repweights)<-0
+ }
+ repweights<-list(weights=repweights, index=match(psu,unq))
+ class(repweights)<-c("repweights_compressed","repweights")
+ rval<-list(type="jk1", repweights=repweights,scale=fpc*(n-1)/n)
+ rval
+ } else {
+ if(fpc==0 && getOption("survey.drop.replicates")) ## exhaustively sampled strata do not need replicates.
+ return(list(type="jk1",repweights=matrix(ncol=0,nrow=length(psu)), scale=0))
+ repweights<-outer(psu, unq, "!=")*n/(n-1)
+ class(repweights)<-"repweights"
+ rval<-list(type="jk1", repweights=repweights,scale=(fpc*(n-1)/n))
+ rval
+ }
+}
+
+
+
+
+jknweights<-function(strata,psu, fpc=NULL,
+ fpctype=c("population","fraction","correction"),
+ compress=TRUE, lonely.psu=getOption("survey.lonely.psu")){
+
+ sunq<-unique(strata)
+ unq<-unique(psu)
+ nstrat<-length(sunq)
+ n<-length(strata)
+
+ lonely.psu<-match.arg(lonely.psu, c("fail","certainty","remove","adjust","average"))
+
+ fpctype<-match.arg(fpctype)
+
+ if (is.null(fpc)){
+ fpc<-rep(1,nstrat)
+ names(fpc)<-as.character(sunq)
+ fpctype<-"correction"
+ } else if (length(fpc)==n){
+ if (length(unique(fpc))>nstrat)
+ stop("More distinct fpc values than strata")
+ fpc<-sapply(sunq, function(ss) fpc[match(ss,strata)])
+ names(fpc)<-as.character(sunq)
+ } else if (length(fpc)==1) {
+ fpc<-rep(fpc,nstrat)
+ names(fpc)<-as.character(sunq)
+ } else if (length(fpc)==nstrat){
+ nn<-names(fpc)
+ if (is.null(nn)) names(fpc)<-as.character(sunq)
+ if (!all(names(fpc) %in% as.character(sunq)))
+ stop("fpc has names that do not match the stratum identifiers")
+ }
+
+
+ if (compress){
+ repweights<-matrix(1,ncol=length(unq),nrow=length(unq))
+ } else {
+ repweights<-matrix(1,ncol=length(unq), nrow=length(psu))
+ }
+ counter<-0
+ rscales<-numeric(length(unq))
+
+ for(ss in as.character(sunq)){
+ thisfpc<-fpc[match(ss,names(fpc))]
+ theseweights<-jk1weights(psu[strata %in% ss], fpc=thisfpc,
+ fpctype=fpctype,compress=compress)
+ nc<-if (compress) NCOL(theseweights$repweights$weights) else NCOL(theseweights$repweights)
+ if (nc==1 && thisfpc!=0){
+ ## lonely PSUs
+ if (lonely.psu=="fail")
+ stop("Stratum",ss,"has only one PSU")
+ if (lonely.psu=="remove")
+ next
+ if (lonely.psu=="certainty")
+ next
+ if (lonely.psu=="average")
+ next
+ if (lonely.psu=="adjust"){
+ nc<-1
+ if (compress)
+ repweights[, counter+nc]<-ifelse(strata[!duplicated(psu)] %in% ss, 0, nstrat/(nstrat-1))
+ else
+ repweights[ counter+nc]<-ifelse(strata %in% ss, 0, nstrat/(nstrat-1))
+ rscales[counter+nc]<-(nstrat-1)/nstrat
+ counter<-counter+nc
+ next
+ }
+ }
+
+ if (compress)
+ repweights[strata[!duplicated(psu)] %in% ss,counter+seq(length=nc)]<-theseweights$repweights$weights
+ else
+ repweights[strata %in% ss, counter+seq(length=nc)]<-theseweights$repweights
+
+ rscales[counter+seq(length=nc)]<-theseweights$scale
+ counter<-counter+nc
+ }
+ if (counter==0) stop("All strata were exhaustively sampled: you have the whole population")
+ scale<-1
+ if (counter<length(unq)){
+ repweights<-repweights[,1:counter]
+ rscales<-rscales[1:counter]
+ if (lonely.psu=="average")
+ scale<-scale*length(unq)/counter
+ }
+ if (compress){
+ repweights<-list(weights=repweights,index=match(psu,unq))
+ class(repweights)<- c("repweights_compressed","repweights")
+ } else class(repweights)<-"repweights"
+ list(type="jkn", repweights=repweights, rscales=rscales, scale=scale)
+}
+
+
+
+brrweights<-function(strata,psu, match=NULL, small=c("fail","split","merge"),
+ large=c("split","merge","fail"), fay.rho=0,
+ only.weights=FALSE, compress=TRUE,
+ hadamard.matrix=NULL){
+
+ small<-match.arg(small)
+ large<-match.arg(large)
+
+ strata<-as.character(strata)
+
+ ssize<-table(strata[!duplicated(psu)])
+ if (any(ssize<2) && small=="fail")
+ stop("Some strata have fewer than 2 PSUs")
+ if (any(ssize>2) && large=="fail")
+ stop("Some strata have more than 2 PSUs")
+
+ unq<-which(!duplicated(psu))
+ sunq<-strata[unq]
+ psunq<-psu[unq]
+ weights<-matrix(ncol=2,nrow=length(unq))
+ weightstrata<-numeric(length(unq))
+
+ if (length(match)==length(strata))
+ match<-match[unq]
+ if (is.null(match))
+ match<-unq ## default is to match by dataset order
+ oo<-order(sunq,match)
+
+ upto <- 0
+
+ if(any(ssize==1)){
+ smallstrata<-names(ssize)[ssize==1]
+ if(small=="split"){
+ weights[sunq %in% smallstrata,1]<- 0.5
+ weights[sunq %in% smallstrata,2]<- 0.5
+ weightstrata[sunq %in% smallstrata]<-1:length(smallstrata)
+ upto<-length(smallstrata)
+ } else {
+ ##small=="merge"
+ if (length(smallstrata) > 1){
+ weights[oo,][sunq[oo] %in% smallstrata, 1]<-rep(0:1,length.out=length(smallstrata))
+ weights[oo,][sunq[oo] %in% smallstrata, 2]<-rep(1:0,length.out=length(smallstrata))
+ if(length(smallstrata) %% 2==0)
+ weightstrata[oo][sunq[oo] %in% smallstrata]<-rep(1:(length(smallstrata) %/%2), 2)
+ else
+ weightstrata[oo][sunq[oo] %in% smallstrata]<-c(1,rep(1:(length(smallstrata) %/%2), 2))
+ upto<-length(smallstrata) %/% 2
+ } else stop("Can't merge with a single small stratum")
+ }
+ }
+
+ if (any(ssize>2)){
+ largestrata<-names(ssize)[ssize>2]
+ if (large=="split"){
+ if (any(ssize[largestrata] %%2 ==1))
+ stop("Can't split with odd numbers of PSUs in a stratum")
+ ## make substrata of size 2
+ for(ss in largestrata){
+ weights[oo,][sunq[oo] %in% ss, 1]<-rep(0:1,length.out=ssize[ss])
+ weights[oo,][sunq[oo] %in% ss, 2]<-rep(1:0,length.out=ssize[ss])
+ weightstrata[oo][sunq[oo] %in% ss]<-upto+rep(1:(ssize[ss] %/%2),each=2)
+ upto<-upto+(ssize[ss] %/% 2)
+ }
+ } else {
+ ## make two substrata.
+ halfsize<-ssize[largestrata] %/%2
+ otherhalfsize<-ssize[largestrata] - halfsize
+ reps<-as.vector(rbind(halfsize,otherhalfsize))
+ nlarge<-length(halfsize)
+ weights[oo,][sunq[oo] %in% largestrata, 1]<-rep(rep(0:1,nlarge),reps)
+ weights[oo,][sunq[oo] %in% largestrata, 2]<-rep(rep(1:0,nlarge),reps)
+ weightstrata[oo][sunq[oo] %in% largestrata]<-upto+rep(1:length(largestrata),ssize[largestrata])
+ upto<-upto+length(largestrata)
+ }
+ }
+ if(any(ssize==2)){
+ goodstrata<-names(ssize)[ssize==2]
+ weights[oo,][sunq[oo] %in% goodstrata, 1]<-rep(0:1,length(goodstrata))
+ weights[oo,][sunq[oo] %in% goodstrata, 2]<-rep(1:0,length(goodstrata))
+ weightstrata[oo][sunq[oo] %in% goodstrata]<-upto+rep(1:length(goodstrata),each=2)
+ upto<-upto+length(goodstrata)
+ }
+
+
+ if (is.null(hadamard.matrix)){
+ H<-hadamard(upto)
+ } else {
+ ## the user supplied hadamard.matrix
+ ## Check that it is a binary matrix and satifies the
+ ## Hadamard determinant property
+ if (!is.matrix(hadamard.matrix) || nrow(hadamard.matrix)<upto+1)
+ stop("hadamard.matrix must be a matrix of dimension at least nstrata+1")
+ values<-unique(as.vector(hadamard.matrix))
+ if(length(values)!=2)
+ stop("hadamard.matrix has more than two different values")
+ H<-ifelse(hadamard.matrix==values[1],0,1)
+ if(!is.hadamard(H,full.orthogonal.balance=FALSE))
+ stop("hadamard.matrix is not a Hadamard matrix")
+ }
+ ii<-1:upto
+ jj<-1:length(weightstrata)
+ sampler<-function(i){
+ h<-H[1+ii, i]+1
+ col<-h[match(weightstrata,ii)]
+ wa<-weights[cbind(jj,col)]
+ wb<-weights[cbind(jj,3-col)]
+ if (compress)
+ wa*(2-fay.rho)+wb*fay.rho
+ else
+ wa[match(psu,psunq)]*(2-fay.rho)+wb[match(psu,psunq)]*fay.rho
+ }
+
+ if (only.weights){
+ repweights<-sapply(1:NCOL(H),sampler)
+ if (!compress){
+ class(repweights)<-"repweights"
+ return(repweights)
+ }
+ repweights<-list(weights=repweights,index=match(psu,psunq))
+ class(repweights)<-c("repweights_compressed","repweights")
+ repweights
+ } else
+ list(weights=weights, wstrata=weightstrata, strata=sunq, psu=psunq,
+ npairs=NCOL(H),sampler=sampler,compress=compress)
+
+}
+
+
+
+
+
+##
+## Designs with replication weights rather than survey structure.
+##
+
+as.svrepdesign<- function(design,type=c("auto","JK1","JKn","BRR","bootstrap","subbootstrap","mrbbootstrap","Fay"),
+ fay.rho=0, fpc=NULL, fpctype=NULL,...,compress=TRUE, mse=getOption("survey.replicates.mse")){
+
+ type<-match.arg(type)
+
+ if (type=="auto"){
+ if (!design$has.strata)
+ type<-"JK1"
+ else
+ type<-"JKn"
+ }
+ selfrep<-NULL
+ if (type=="JK1" && design$has.strata)
+ stop("Can't use JK1 for a stratified design")
+ if (type %in% c("JKn","BRR","Fay") && !design$has.strata)
+ stop("Must use JK1 or bootstrap for an unstratified design")
+
+ if (is.null(fpc)) {
+ fpctype<-"population"
+
+ if (is.null(design$fpc) ||
+ (inherits(design, "survey.design2") && is.null(design$fpc$popsize))){
+ fpc<-NULL
+ } else if (type %in% c("Fay","BRR")){
+ warning("Finite population correction dropped in conversion")
+ } else {
+ if (inherits(design,"survey.design2")){
+ fpc<-design$fpc$popsize
+ if(NCOL(fpc)>1 && type!="mrbbootstrap"){
+ fpc<-fpc[,1]
+ warning("Finite population corrections after first stage have been dropped")
+ }
+ if (getOption("survey.drop.replicates")){
+ selfrep<-design$fpc$popsize[,1]==design$fpc$sampsize[,1]
+ }
+ } else{
+ fpc<-design$fpc[,2]
+ names(fpc)<-design$fpc[,1]
+ }
+ }
+ } else {
+ if (type %in% c("Fay","BRR","subbootstrap")) stop(paste("fpc information cannot be used for type=",type))
+ if (is.null(fpctype)) stop("fpctype must be supplied if fpc is supplied")
+ }
+ if (type=="JK1"){
+ ##JK1
+ r<-jk1weights(design$cluster[,1], fpc=fpc,fpctype=fpctype, compress=compress)
+ repweights<-r$repweights
+ scale<-drop(r$scale)
+ if (inherits(repweights,"repweights_compressed"))
+ rscales<-rep(1, NCOL(repweights$weights))
+ else
+ rscales<-rep(1, NCOL(repweights))
+
+ type<-"JK1"
+ pweights<-1/design$prob
+ } else if (type %in% c("BRR","Fay")){
+ ##BRR
+ if (inherits(design,"survey.design2"))
+ repweights<-brrweights(design$strata[,1], design$cluster[,1],...,fay.rho=fay.rho,
+ compress=compress,only.weights=TRUE)
+ else
+ repweights<-brrweights(design$strata, design$cluster[,1],...,fay.rho=fay.rho,
+ compress=compress,only.weights=TRUE)
+ pweights<-1/design$prob
+ if (length(pweights)==1)
+ pweights<-rep(pweights, NROW(design$variables))
+
+ if (fay.rho==0)
+ type<-"BRR"
+ else
+ type<-"Fay"
+
+ rscales<-rep(1,ncol(repweights))
+ scale<-1/(ncol(repweights)*(1-fay.rho)^2)
+
+ } else if (type=="JKn"){
+ ##JKn
+ if (inherits(design,"survey.design2"))
+ r<-jknweights(design$strata[,1],design$cluster[,1], fpc=fpc,
+ fpctype=fpctype, compress=compress)
+ else
+ r<-jknweights(design$strata,design$cluster[,1], fpc=fpc,
+ fpctype=fpctype, compress=compress)
+ pweights<-1/design$prob
+ repweights<-r$repweights
+ scale<-r$scale
+ rscales<-r$rscales
+ } else if (type=="bootstrap"){
+ ##bootstrap
+ if (inherits(design,"survey.design2"))
+ r<-bootweights(design$strata[,1],design$cluster[,1], fpc=fpc,
+ fpctype=fpctype, compress=compress,...)
+ else
+ r<-bootweights(design$strata,design$cluster[,1], fpc=fpc,
+ fpctype=fpctype, compress=compress,...)
+ pweights<-1/design$prob
+ repweights<-r$repweights
+ scale<-r$scale
+ rscales<-r$rscales
+ }else if (type=="subbootstrap"){
+ ##bootstrap
+ if (inherits(design,"survey.design2"))
+ r<-subbootweights(design$strata[,1],design$cluster[,1],compress=compress,...)
+ else
+ r<-subbootweights(design$strata,design$cluster[,1],compress=compress,...)
+ pweights<-1/design$prob
+ repweights<-r$repweights
+ scale<-r$scale
+ rscales<-r$rscales
+ } else if (type=="mrbbootstrap"){
+ if (inherits(design,"survey.design2"))
+ r<-mrbweights(design$cluster,design$strata,design$fpc,...)
+ else
+ stop("MRB bootstrap not available for obsolete svydesign objects")
+ pweights<-1/design$prob
+ repweights<-r$repweights
+ scale<-r$scale
+ rscales<-r$rscales
+ } else stop("Can't happen")
+
+ rval<-list(repweights=repweights, pweights=pweights,
+ type=type, rho=fay.rho,scale=scale, rscales=rscales,
+ call=sys.call(), combined.weights=FALSE, selfrep=selfrep,mse=mse)
+ rval$variables <- design$variables
+ class(rval)<-"svyrep.design"
+ rval$degf<-degf(rval)
+ rval
+}
+
+
+
+svrepdesign<-function(variables, repweights, weights,data=NULL,...) UseMethod("svrepdesign",data)
+
+svrepdesign.default<-function(variables=NULL,repweights=NULL, weights=NULL,
+ data=NULL,type=c("BRR","Fay","JK1", "JKn","bootstrap","other"),
+ combined.weights=TRUE, rho=NULL, bootstrap.average=NULL,
+ scale=NULL,rscales=NULL,fpc=NULL, fpctype=c("fraction","correction"),
+ mse=getOption("survey.replicates.mse"),...)
+{
+
+ type<-match.arg(type)
+
+ if(type=="Fay" && is.null(rho))
+ stop("With type='Fay' you must supply the correct rho")
+
+ if (type %in% c("JK1","JKn") && !is.null(rho))
+ warning("rho not relevant to JK1 design: ignored.")
+
+ if (type %in% c("other") && !is.null(rho))
+ warning("rho ignored.")
+
+
+ if(is.null(variables))
+ variables<-data
+
+ if(inherits(variables,"formula")){
+ mf<-substitute(model.frame(variables, data=data,na.action=na.pass))
+ variables<-eval.parent(mf)
+ }
+
+ if(inherits(repweights,"formula")){
+ mf<-substitute(model.frame(repweights, data=data))
+ repweights<-eval.parent(mf)
+ repweights<-na.fail(repweights)
+ }
+
+ if(is.character(repweights)){##regular expression
+ wtcols<-grep(repweights,names(data))
+ repweights<-data[,wtcols]
+ }
+
+ if (is.null(repweights))
+ stop("You must provide replication weights")
+
+
+ if(inherits(weights,"formula")){
+ mf<-substitute(model.frame(weights, data=data))
+ weights<-eval.parent(mf)
+ weights<-drop(as.matrix(na.fail(weights)))
+ }
+
+ if (is.null(weights)){
+ warning("No sampling weights provided: equal probability assumed")
+ weights<-rep(1,NROW(repweights))
+ }
+
+ repwtmn<-mean(apply(repweights,2,mean))
+ wtmn<-mean(weights)
+ probably.combined.weights<-(repwtmn>5) & (wtmn/repwtmn<5)
+ probably.not.combined.weights<-(repwtmn<5) & (wtmn/repwtmn>5)
+ if (combined.weights & probably.not.combined.weights)
+ warning(paste("Data do not look like combined weights: mean replication weight is", repwtmn," and mean sampling weight is",wtmn))
+ if (!combined.weights & probably.combined.weights)
+ warning(paste("Data look like combined weights: mean replication weight is", repwtmn," and mean sampling weight is",wtmn))
+
+ if (!is.null(rscales) && !(length(rscales) %in% c(1, ncol(repweights)))){
+ stop(paste("rscales has length ",length(rscales),", should be ncol(repweights)",sep=""))
+ }
+
+ if (type == "BRR")
+ scale<-1/ncol(repweights)
+ if (type=="Fay")
+ scale <-1/(ncol(repweights)*(1-rho)^2)
+
+ if (type=="bootstrap"){
+ if(is.null(bootstrap.average))
+ bootstrap.average<-1
+ if (is.null(scale))
+ scale<-bootstrap.average/(ncol(repweights)-1)
+ if (is.null(rscales))
+ rscales<-rep(1,ncol(repweights))
+ }
+
+ if (type=="JK1" && is.null(scale)) {
+ if(!combined.weights){
+ warning("scale (n-1)/n not provided: guessing from weights")
+ scale<-1/max(repweights[,1])
+ } else {
+ probably.n = ncol(repweights)
+ scale<- (probably.n-1)/probably.n
+ warning("scale (n-1)/n not provided: guessing n=number of replicates")
+ }
+ }
+
+ if (type =="JKn" && is.null(rscales))
+ if (!combined.weights) {
+ warning("rscales (n-1)/n not provided:guessing from weights")
+ rscales<-1/apply(repweights,2,max)
+ } else stop("Must provide rscales for combined JKn weights")
+
+ if (type=="other" && (is.null(rscales) || is.null(scale))){
+ if (is.null(rscales)) rscales<-rep(1,NCOL(repweights))
+ if (is.null(scale)) scale<-1
+ warning("scale or rscales not specified, set to 1")
+ }
+ if (is.null(rscales)) rscales<-rep(1,NCOL(repweights))
+
+ if (!is.null(fpc)){
+ if (missing(fpctype)) stop("Must specify fpctype")
+ fpctype<-match.arg(fpctype)
+ if (type %in% c("BRR","Fay")) stop("fpc not available for this type")
+ if (type %in% "bootstrap") stop("Separate fpc not needed for bootstrap")
+ if (length(fpc)!=length(rscales)) stop("fpc is wrong length")
+ if (any(fpc>1) || any(fpc<0)) stop("Illegal fpc value")
+ fpc<-switch(fpctype,correction=fpc,fraction=1-fpc)
+ rscales<-rscales*fpc
+ }
+
+
+ rval<-list(type=type, scale=scale, rscales=rscales, rho=rho,call=sys.call(),
+ combined.weights=combined.weights)
+ rval$variables<-variables
+ rval$pweights<-weights
+ if (!inherits(repweights,"repweights"))
+ class(rval)<-"repweights"
+ rval$repweights<-repweights
+ class(rval)<-"svyrep.design"
+ rval$degf<-degf(rval)
+ rval$mse<-mse
+ rval
+
+}
+
+
+print.svyrep.design<-function(x,...){
+ cat("Call: ")
+ print(x$call)
+ if (x$type=="Fay")
+ cat("Fay's variance method (rho=",x$rho,") ")
+ if (x$type=="BRR")
+ cat("Balanced Repeated Replicates ")
+ if (x$type=="JK1")
+ cat("Unstratified cluster jacknife (JK1) ")
+ if (x$type=="JKn")
+ cat("Stratified cluster jackknife (JKn) ")
+ if (x$type=="bootstrap")
+ cat("Survey bootstrap ")
+ if (x$type=="mrbbootstrap")
+ cat("Multistage rescaled bootstrap ")
+ if (x$type=="subbootstrap")
+ cat("(n-1) bootstrap ")
+ nweights<-ncol(x$repweights)
+ cat("with", nweights,"replicates")
+ if (!is.null(x$mse) && x$mse) cat(" and MSE variances")
+ cat(".\n")
+ invisible(x)
+}
+
+summary.svyrep.design<-function(object,...){
+ class(object)<-c("summary.svyrep.design", class(object))
+ object
+}
+
+print.summary.svyrep.design<-function(x,...){
+ class(x)<-class(x)[-1]
+ print(x)
+ cat("Variables: \n")
+ print(colnames(x))
+}
+
+
+
+image.svyrep.design<-function(x, ..., col=grey(seq(.5,1,length=30)),
+ type.=c("rep","total")){
+ type<-match.arg(type.)
+ m<-as.matrix(x$repweights)
+ if (type=="total"){
+ m<-m*x$pweights
+ }
+
+ image(1:NCOL(m), 1:NROW(m), t(m), col=col, xlab="Replicate", ylab="Observation",...)
+ invisible(NULL)
+}
+
+"[.svyrep.design"<-function(x, i, j, drop=FALSE){
+ if (!missing(i)){
+ pwt<-x$pweights
+ if (is.data.frame(pwt)) pwt<-pwt[[1]]
+ x$pweights<-pwt[i]
+ x$repweights<-x$repweights[i,,drop=FALSE]
+ if(!is.null(x$selfrep))
+ x$selfrep<-x$selfrep[i]
+ if (!missing(j))
+ x$variables<-x$variables[i,j, drop=FALSE]
+ else
+ x$variables<-x$variables[i,,drop=FALSE]
+ x$degf<-NULL
+ x$degf<-degf(x)
+ } else {
+ x$variables<-x$variables[,j,drop=FALSE]
+ }
+ x
+}
+
+
+subset.svyrep.design<-function(x,subset,...){
+ e <- substitute(subset)
+ r <- eval(e, x$variables, parent.frame())
+ r <- r & !is.na(r)
+ x<-x[r,]
+ x$call<-sys.call(-1)
+ x
+}
+
+update.svyrep.design<-function(object,...){
+
+ dots<-substitute(list(...))[-1]
+ newnames<-names(dots)
+
+ for(j in seq(along=dots)){
+ object$variables[,newnames[j]]<-eval(dots[[j]],object$variables, parent.frame())
+ }
+
+ object$call<-sys.call(-1)
+ object
+}
+
+weights.svyrep.design<-function(object,type=c("replication","sampling","analysis"),...){
+ type<-match.arg(type)
+ switch(type,
+ replication= as.matrix(object$repweights),
+ sampling=object$pweights,
+ analysis=if(object$combined.weights) as.matrix(object$repweights) else as.matrix(object$repweights)*object$pweights)
+}
+
+weights.survey.design<-function(object,...){
+ return(1/object$prob)
+}
+
+
+svyquantile.svyrep.design<-svrepquantile<-function(x,design,quantiles,method="linear",
+ interval.type=c("probability","quantile"),f=1,
+ return.replicates=FALSE,
+ ties=c("discrete","rounded"),na.rm=FALSE,...){
+
+ if (!exists(".Generic",inherits=FALSE))
+ .Deprecated("svyquantile")
+
+ ties<-match.arg(ties)
+ interval<-match.arg(interval.type)
+ if (design$type %in% c("JK1","JKn") && interval=="quantile")
+ warning("Jackknife replicate weights may not give valid standard errors for quantiles")
+ if (design$type %in% "other" && interval=="quantile")
+ warning("Not all replicate weight designs give valid standard errors for quantiles.")
+ if (inherits(x,"formula"))
+ x<-model.frame(x,design$variables,na.action=if(na.rm) na.pass else na.fail)
+ else if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ if (length(nas)>length(design$prob))
+ x<-x[nas==0,,drop=FALSE]
+ else
+ x[nas>0,]<-0
+ }
+
+ if (NROW(x)<=1){
+ rval<-matrix(rep(as.matrix(x),length(quantiles)),ncol=NCOL(x),nrow=length(quantiles),byrow=TRUE)
+ dimnames(rval)<-list(paste("q",round(quantiles,2),sep=""), names(x))
+ if (getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep))
+ vv<-matrix(0,ncol=NCOL(x),nrow=length(quantiles))
+ else
+ vv<-matrix(NA,ncol=NCOL(x),nrow=length(quantiles))
+ dimnames(vv)<-list(paste("q",round(quantiles,2),sep=""), names(x))
+ attr(rval,"var")<-vv
+ attr(rval,"statistic")<-quantiles
+ if (return.replicates)
+ rval<-list(mean=rval,replicates=NULL)
+ class(rval)<-"svrepstat"
+ return(rval)
+ }
+
+
+ w<-weights(design,"analysis")
+
+ if (interval=="quantile"){
+ ## interval on quantile scale
+ if (ties=="discrete")
+ computeQuantiles<-function(xx){
+ oo<-order(xx)
+
+ ws<-weights(design,"sampling")
+ cum.ws<-cumsum(ws[oo])/sum(ws)
+ rval<-approx(cum.ws,xx[oo],method=method,f=f,
+ yleft=min(xx),yright=max(xx),
+ xout=quantiles,ties=min)$y
+
+ cum.w<-apply(w,2,function(wi) cumsum(wi[oo])/sum(wi))
+
+ qq<-apply(cum.w, 2,function(cum.wi) approx(cum.wi,xx[oo],method=method,f=f,
+ yleft=min(xx),yright=max(xx),
+ xout=quantiles,ties=min)$y)
+ if (length(quantiles)>1)
+ qq<-t(qq)
+ else
+ qq<-as.matrix(qq)
+ ##rval<-colMeans(qq)
+
+ rval<-list(quantiles=rval,
+ variances=diag(as.matrix(svrVar(qq,design$scale,design$rscales,mse=design$mse,coef=rval))))
+ if (return.replicates)
+ rval<-c(rval, list(replicates=qq))
+ rval
+ } else { ##ties="rounded"
+ computeQuantiles<-function(xx){
+ ws<-weights(design,"sampling")
+
+ wws<-rowsum(ws,xx,reorder=TRUE)
+ uxx<-sort(unique(xx))
+
+ cum.wws<-cumsum(wws)/sum(wws)
+ rval<-approx(cum.wws,uxx,method=method,f=f,
+ yleft=min(xx),yright=max(xx),
+ xout=quantiles,ties=min)$y
+
+ cum.w<-apply(rowsum(w,xx,reorder=TRUE),2,function(wi) cumsum(wi)/sum(wi))
+
+ qq<-apply(cum.w, 2,function(cum.wi) approx(cum.wi,uxx,method=method,f=f,
+ yleft=min(xx),yright=max(xx),
+ xout=quantiles,ties=min)$y)
+ if (length(quantiles)>1)
+ qq<-t(qq)
+ else
+ qq<-as.matrix(qq)
+ ##rval<-colMeans(qq)
+
+ rval<-list(quantiles=rval,
+ variances=diag(as.matrix(svrVar(qq,design$scale,design$rscales,mse=design$mse,coef=rval))))
+ if (return.replicates)
+ rval<-c(rval, list(replicates=qq))
+ rval
+ }
+ }
+ } else {
+ ## interval on probability scale, backtransformed.
+ if (ties=="discrete"){
+ computeQuantiles<-function(xx){
+ oo<-order(xx)
+ w<-weights(design,"sampling")
+ cum.w<- cumsum(w[oo])/sum(w)
+ Qf<-approxfun(cum.w,xx[oo],method=method,f=f,
+ yleft=min(xx),yright=max(xx),
+ ties=min)
+
+ point.estimates<-Qf(quantiles)
+ if(length(quantiles)==1)
+ estfun<-as.numeric(xx<point.estimates)
+ else
+ estfun<-0+outer(xx,point.estimates,"<")
+ est<-svymean(estfun,design, return.replicates=return.replicates)
+ if (return.replicates)
+ q.estimates<-matrix(Qf(est$replicates),nrow=NROW(est$replicates))
+ ci<-matrix(Qf(c(coef(est)+2*SE(est), coef(est)-2*SE(est))),ncol=2)
+ variances<-((ci[,1]-ci[,2])/4)^2
+ rval<-list(quantiles=point.estimates,
+ variances=variances)
+ if (return.replicates)
+ rval<-c(rval, list(replicates=q.estimates))
+ rval
+ }
+ } else {
+ ## ties=rounded
+ computeQuantiles<-function(xx){
+ w<-weights(design,"sampling")
+ ww<-rowsum(w,xx,reorder=TRUE)
+ uxx<-sort(unique(xx))
+ cum.w<- cumsum(ww)/sum(ww)
+ Qf<-approxfun(cum.w,uxx,method=method,f=f,
+ yleft=min(xx),yright=max(xx),
+ ties=min)
+
+ point.estimates<-Qf(quantiles)
+ if(length(quantiles)==1)
+ estfun<-as.numeric(xx<point.estimates)
+ else
+ estfun<-0+outer(xx,point.estimates,"<")
+ est<-svymean(estfun, design, return.replicates=return.replicates)
+ if (return.replicates)
+ q.estimates<-matrix(Qf(est$replicates),nrow=NROW(est$replicates))
+ ci<-matrix(Qf(c(coef(est)+2*SE(est), coef(est)-2*SE(est))),ncol=2)
+ variances<-((ci[,1]-ci[,2])/4)^2
+ rval<-list(quantiles=point.estimates,
+ variances=variances)
+ if (return.replicates)
+ rval<-c(rval, list(replicates=q.estimates))
+ rval
+ }
+
+ }
+ }
+
+ if (!is.null(dim(x)))
+ results<-apply(x,2,computeQuantiles)
+ else
+ results<-computeQuantiles(x)
+
+ rval<-matrix(sapply(results,"[[","quantiles"),ncol=NCOL(x),nrow=length(quantiles),
+ dimnames=list(paste("q",round(quantiles,2),sep=""), names(x)))
+ vv<-matrix(sapply(results,"[[","variances"),ncol=NCOL(x),nrow=length(quantiles),
+ dimnames=list(paste("q",round(quantiles,2),sep=""), names(x)))
+ attr(rval,"var")<-vv
+ attr(rval, "statistic")<-"quantiles"
+ if (return.replicates) {
+ reps<-do.call(cbind,lapply(results,"[[","replicates"))
+ attr(reps,"scale")<-design$scale
+ attr(reps,"rscales")<-design$rscales
+ attr(reps,"mse")<-design$mse
+ rval<-list(mean=rval, replicates=reps)
+ }
+ class(rval)<-"svrepstat"
+ rval
+
+}
+
+
+svrVar<-function(thetas, scale, rscales,na.action=getOption("na.action"),mse=getOption("survey.replicates.mse"),coef){
+ thetas<-get(na.action)(thetas)
+ naa<-attr(thetas,"na.action")
+ if (!is.null(naa)){
+ rscales<-rscales[-naa]
+ if (length(rscales))
+ warning(length(naa), " replicates gave NA results and were discarded.")
+ else
+ stop("All replicates contained NAs")
+ }
+ if (is.null(mse)) mse<-FALSE
+
+ if (length(dim(thetas))==2){
+ if (mse) {
+ meantheta<-coef
+ } else {
+ meantheta<-colMeans(thetas[rscales>0,,drop=FALSE])
+ }
+ v<-crossprod( sweep(thetas,2, meantheta,"-")*sqrt(rscales))*scale
+ } else {
+ if (mse){
+ meantheta<-coef
+ } else {
+ meantheta<-mean(thetas[rscales>0])
+ }
+ v<- sum( (thetas-meantheta)^2*rscales)*scale
+ }
+ attr(v,"na.replicates")<-naa
+ attr(v,"means")<-meantheta
+ return(v)
+}
+
+
+svyvar.svyrep.design<-svrepvar<-function(x, design, na.rm=FALSE, rho=NULL,
+ return.replicates=FALSE,...,estimate.only=FALSE){
+
+ if (!exists(".Generic",inherits=FALSE))
+ .Deprecated("svyvar")
+
+ if (inherits(x,"formula"))
+ x<-model.frame(x,design$variables,na.action=na.pass)
+ else if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+
+ wts<-design$repweights
+ scale<-design$scale
+ rscales<-design$rscales
+ if (!is.null(rho)) .NotYetUsed("rho")
+
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ if(any(nas>0)){
+ design<-design[nas==0,]
+ x<-x[nas==0,,drop=FALSE]
+ wts<-wts[nas==0,,drop=FALSE]
+ }
+ }
+
+ if (design$combined.weights)
+ pw<-1
+ else
+ pw<-design$pweights
+
+ n<-NROW(x)
+ p<-NCOL(x)
+ v<-function(w){
+ xbar<-colSums(as.vector(w)*pw*x)/sum(as.vector(w)*pw)
+ xdev<-sweep(x,2,xbar,"-")
+ x1<-matrix(rep(xdev,p),ncol=p*p)
+ x2<-xdev[,rep(1:p,each=p),drop=FALSE]
+ (n/(n-1))*colSums(x1*x2*as.vector(w)*pw)/sum(as.vector(w)*pw)
+ }
+
+ if (design$combined.weights)
+ rval<-v(design$pweights)
+ else
+ rval<-v(rep(1,length(design$pweights)))
+
+ rval<-matrix(rval, ncol=p)
+ dimnames(rval)<-list(colnames(x),colnames(x))
+ if (estimate.only) return(rval)
+
+ repvars<-apply(wts,2, v)
+
+ repvars<-drop(t(repvars))
+ attr(rval,"var")<-svrVar(repvars, scale, rscales,mse=design$mse, coef=rval)
+ attr(rval, "statistic")<-"variance"
+ if (return.replicates){
+ attr(repvars,"scale")<-design$scale
+ attr(repvars,"rscales")<-design$rscales
+ attr(repvars,"mse")<-design$mse
+ rval<-list(variance=rval, replicates=repvars)
+ }
+ class(rval)<-c("svrepvar","svrepstat")
+ rval
+
+}
+
+
+print.svrepvar<-function (x, covariance=FALSE, ...)
+{
+ if (is.list(x)) x<-x[[1]]
+ vv <- as.matrix(attr(x, "var"))
+ if (covariance){
+ nms<-outer(rownames(x),colnames(x),paste,sep=":")
+ m<-cbind(as.vector(x), sqrt(diag(vv)))
+ rownames(m)<-nms
+ } else{
+ ii <- which(diag(sqrt(length(x)))>0)
+ m <- cbind(x[ii], sqrt(diag(vv))[ii])
+ if(length(ii)==1) rownames(m)<-rownames(x)
+ }
+ colnames(m) <- c(attr(x, "statistic"), "SE")
+ printCoefmat(m)
+}
+
+as.matrix.svrepvar<-function(x,...) if (is.list(x)) unclass(x[[1]]) else unclass(x)
+
+
+svymean.svyrep.design<-svrepmean<-function(x,design, na.rm=FALSE, rho=NULL,
+ return.replicates=FALSE,deff=FALSE,...)
+{
+ if (!exists(".Generic",inherits=FALSE))
+ .Deprecated("svymean")
+ if (!inherits(design,"svyrep.design")) stop("design is not a replicate survey design")
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$variables,na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ } else {
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else {
+ if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ }
+
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ x<-x[nas==0,,drop=FALSE]
+ }
+
+ wts<-design$repweights
+ scale<-design$scale
+ rscales<-design$rscales
+ if (!is.null(rho)) .NotYetUsed("rho")
+
+ if (!design$combined.weights)
+ pw<-design$pweights
+ else
+ pw<-1
+
+ rval<-colSums(design$pweights*x)/sum(design$pweights)
+
+ if (getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){
+ v<-matrix(0,length(rval),length(rval))
+ repmeans<-NULL
+ } else {
+ if (inherits(wts, "repweights_compressed")){
+ repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts$weights))
+ for(i in 1:ncol(wts$weights)){
+ wi<-wts$weights[wts$index,i]
+ repmeans[i,]<-t(colSums(wi*x*pw)/sum(pw*wi))
+ }
+ } else {
+ repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts))
+ for(i in 1:ncol(wts)){
+ repmeans[i,]<-t(colSums(wts[,i]*x*pw)/sum(pw*wts[,i]))
+ }
+ }
+ repmeans<-drop(repmeans)
+ v <- svrVar(repmeans, scale, rscales,mse=design$mse, coef=rval)
+}
+ attr(rval,"var") <-v
+ attr(rval, "statistic")<-"mean"
+ if (return.replicates){
+ attr(repmeans,"scale")<-design$scale
+ attr(repmeans,"rscales")<-design$rscales
+ attr(repmeans,"mse")<-design$mse
+ rval<-list(mean=rval, replicates=repmeans)
+ }
+ if (is.character(deff) || deff){
+ nobs<-length(design$pweights)
+ npop<-sum(design$pweights)
+ vsrs<-unclass(svyvar(x,design,na.rm=na.rm, return.replicates=FALSE,estimate.only=TRUE))/length(design$pweights)
+ if (deff!="replace")
+ vsrs<-vsrs*(npop-nobs)/npop
+ attr(rval,"deff") <- v/vsrs
+ }
+ class(rval)<-"svrepstat"
+ rval
+}
+
+
+
+svytotal.svyrep.design<-svreptotal<-function(x,design, na.rm=FALSE, rho=NULL,
+ return.replicates=FALSE, deff=FALSE,...)
+{
+ if (!exists(".Generic",inherits=FALSE))
+ .Deprecated("svytotal")
+ if (!inherits(design,"svyrep.design")) stop("design is not a replicate survey design")
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$variables,na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ } else{
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else {
+ if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ }
+
+
+
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ x<-x[nas==0,,drop=FALSE]
+ }
+
+ wts<-design$repweights
+ scale<-design$scale
+ rscales<-design$rscales
+ if (!is.null(rho)) .NotYetUsed("rho")
+
+ if (!design$combined.weights)
+ pw<-design$pweights
+ else
+ pw<-1
+
+ rval<-colSums(design$pweights*x)
+
+ if (is.character(deff) || deff){
+ nobs<-length(design$pweights)
+ npop<-sum(design$pweights)
+ vsrs<-unclass(svyvar(x,design,na.rm=na.rm, return.replicates=FALSE,estimate.only=TRUE))*sum(design$pweights)^2/nobs
+ if (deff!="replace")
+ vsrs<-vsrs*(npop-nobs)/npop
+ }
+
+ if (getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){
+ v<-matrix(0,nrow=NROW(rval),ncol=NROW(rval))
+ repmeans<-NULL
+ } else {
+ if (inherits(wts, "repweights_compressed")){
+ if (getOption("survey.drop.replicates") && !is.null(design$selfrep)){
+ wts$index<-wts$index[!design$selfrep]
+ x<-x[!design$selfrep,,drop=FALSE]
+ pw<-pw[!design$selfrep]
+ }
+ repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts$weights))
+ for(i in 1:ncol(wts$weights)){
+ wi<-wts$weights[wts$index,i]
+ repmeans[i,]<-t(colSums(wi*x*pw))
+ }
+ } else {
+ if (getOption("survey.drop.replicates") && !is.null(design$selfrep)){
+ wts<-wts[!design$selfrep,,drop=FALSE]
+ x<-x[!design$selfrep,,drop=FALSE]
+ pw<-pw[!design$selfrep]
+ }
+ repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts))
+ for(i in 1:ncol(wts)){
+ repmeans[i,]<-t(colSums(wts[,i]*x*pw))
+ }
+ }
+ repmeans<-drop(repmeans)
+ v <- svrVar(repmeans, scale, rscales,mse=design$mse,coef=rval)
+ }
+attr(rval,"var") <- v
+attr(rval, "statistic")<-"total"
+if (return.replicates){
+ attr(repmeans,"scale")<-design$scale
+ attr(repmeans,"rscales")<-design$rscales
+ attr(repmeans,"mse")<-design$mse
+ rval<-list(mean=rval, replicates=repmeans)
+}
+
+if (is.character(deff) || deff)
+ attr(rval,"deff") <- v/vsrs
+class(rval)<-"svrepstat"
+rval
+}
+
+
+
+svycoxph.svyrep.design<-function(formula, design, subset=NULL,...,return.replicates=FALSE,na.action,
+ multicore=getOption("survey.multicore")){
+ subset<-substitute(subset)
+ subset<-eval(subset, design$variables, parent.frame())
+ if (!is.null(subset))
+ design<-design[subset,]
+ if (multicore && !requireNamespace(parallel,quietly=TRUE))
+ multicore<-FALSE
+
+ data<-design$variables
+
+
+ g<-match.call()
+ g$design<-NULL
+ g$return.replicates<-NULL
+ g$weights<-quote(.survey.prob.weights)
+ g[[1]]<-quote(coxph)
+ g$x<-TRUE
+
+ scale<-design$scale
+ rscales<-design$rscales
+
+ pwts<-design$pweights/sum(design$pweights)
+ if (is.data.frame(pwts)) pwts<-pwts[[1]]
+
+ if (!all(all.vars(formula) %in% names(data)))
+ stop("all variables must be in design= argument")
+ .survey.prob.weights<-pwts
+ full<-with(data,eval(g))
+
+ nas<-attr(full$model, "na.action")
+
+ betas<-matrix(ncol=length(coef(full)),nrow=ncol(design$repweights))
+
+ wts<-design$repweights
+
+ if (!design$combined.weights){
+ pw1<-pwts
+ rwt<-pw1/mean(pw1)
+ } else{
+ rwt<-1/mean(as.vector(wts[,1]))
+ pw1<-rwt
+ }
+
+ if (length(nas))
+ wts<-wts[-nas,]
+ beta0<-coef(full)
+
+ ## coxph doesn't allow zero weights
+ EPSILON<-1e-10
+
+ if(full$method %in% c("efron","breslow")){
+ if (attr(full$y,"type")=="right")
+ fitter<-coxph.fit
+ else if(attr(full$y,"type")=="counting")
+ fitter<-survival::agreg.fit
+ else stop("invalid survival type")
+ } else fitter<-survival::agexact.fit
+
+## g$init<-beta0
+## for(i in 1:ncol(wts)){
+## .survey.prob.weights<-as.vector(wts[,i])*pw1+EPSILON
+## betas[i,]<-with(data,coef(eval(g)))
+## }
+ if (multicore){
+ betas<-do.call(rbind, parallel::mclapply(1:ncol(wts), function(i){
+ fitter(full$x, full$y, full$strata, full$offset,
+ coef(full), coxph.control(),
+ as.vector(wts[,i])*pw1+EPSILON,
+ full$method, names(full$resid))$coef
+ }))
+ }else{
+ for(i in 1:ncol(wts)){
+ betas[i,]<-fitter(full$x, full$y, full$strata, full$offset,
+ coef(full), coxph.control(),
+ as.vector(wts[,i])*pw1+EPSILON,
+ full$method, names(full$resid))$coef
+
+ }
+ }
+
+ if (length(nas))
+ design<-design[-nas,]
+
+ v<-svrVar(betas,scale, rscales, mse=design$mse, coef=beta0)
+
+ full$var<-v
+ if (return.replicates){
+ attr(betas,"scale")<-design$scale
+ attr(betas,"rscales")<-design$rscales
+ attr(betas,"mse")<-design$mse
+ full$replicates<-betas
+ }
+ full$naive.var<-NULL
+ full$wald.test<-coef(full)%*%solve(full$var,coef(full))
+ full$loglik<-c(NA,NA)
+ full$rscore<-NULL
+ full$score<-NA
+ full$degf.residual<-degf(design)+1-length(coef(full)[!is.na(coef(full))])
+
+ class(full)<-c("svrepcoxph","svycoxph",class(full))
+ full$call<-match.call()
+ full$printcall<-sys.call(-1)
+ full$survey.design<-design
+
+ full
+}
+
+svrepglm<-svyglm.svyrep.design<-function(formula, design, subset=NULL, ...,
+ rho=NULL, return.replicates=FALSE, na.action,
+ multicore=getOption("survey.multicore")){
+
+ if (!exists(".Generic",inherits=FALSE))
+ .Deprecated("svyglm")
+
+ subset<-substitute(subset)
+ subset<-eval(subset, design$variables, parent.frame())
+ if (!is.null(subset))
+ design<-design[subset,]
+
+ if(multicore && !requireNamespace("parallel",quietly=TRUE))
+ multicore<-FALSE
+
+ data<-design$variables
+
+
+ g<-match.call()
+ formula<-eval.parent(formula)
+ environment(formula)<-environment()
+ g$formula<-formula
+ g$data<-quote(data)
+ g$design<-NULL
+ g$var<-g$rho<-g$return.replicates<-g$multicore<-NULL
+ g$weights<-quote(.survey.prob.weights)
+ g[[1]]<-quote(glm)
+ g$model<-TRUE
+ g$x<-TRUE
+ g$y<-TRUE
+
+ scale<-design$scale
+ rscales<-design$rscales
+ if (!is.null(rho)) .NotYetUsed(rho)
+
+ pwts<-design$pweights/sum(design$pweights)
+ if (is.data.frame(pwts)) pwts<-pwts[[1]]
+
+ if (!all(all.vars(formula) %in% names(data)))
+ stop("all variables must be in design= argument")
+ .survey.prob.weights<-pwts
+ full<-with(data,eval(g))
+
+ full$naive.cov<-summary(full)$cov.unscaled
+
+ nas<-attr(full$model, "na.action")
+
+ if(getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){
+
+ v<-matrix(0,ncol=length(coef(full)),nrow=length(coef(full)))
+ betas<-NULL
+
+ } else {
+ betas<-matrix(ncol=length(coef(full)),
+ nrow=ncol(design$repweights))
+
+ if (!design$combined.weights)
+ pw1<-pwts
+ else
+ pw1<-rep(1,length(pwts))
+ wts<-design$repweights
+ if (length(nas)){
+ wts<-wts[-nas,]
+ pw1<-pw1[-nas]
+ }
+ XX<-full$x
+ YY<-full$y
+ beta0<-coef(full)
+ if (!all(is.finite(beta0))) stop(paste("Infinite/NA values in estimate (",paste(beta0,collapse=","),")"))
+ if(is.null(full$offset))
+ offs<-rep(0,nrow(XX))
+ else
+ offs<-full$offset
+ incpt<-as.logical(attr(terms(full),"intercept"))
+ fam<-full$family
+ contrl<-full$control
+ if (multicore){
+ betas<-do.call(rbind,parallel::mclapply(1:ncol(wts), function(i){
+ wi<-as.vector(wts[,i])*pw1
+ glm.fit(XX, YY, weights = wi/sum(wi),
+ start =beta0,
+ offset = offs,
+ family = fam, control = contrl,
+ intercept = incpt)$coefficients
+
+ }))
+ } else {
+ for(i in 1:ncol(wts)){
+ wi<-as.vector(wts[,i])*pw1
+ betas[i,]<-glm.fit(XX, YY, weights = wi/sum(wi),
+ start =beta0,
+ offset = offs,
+ family = fam, control = contrl,
+ intercept = incpt)$coefficients
+ }
+ }
+ v<-svrVar(betas,scale, rscales,mse=design$mse,coef=beta0)
+ }
+
+ full$x<-NULL
+ full$df.residual<-degf(design)+1-length(coef(full)[!is.na(coef(full))])
+
+ if (length(nas))
+ design<-design[-nas,]
+
+ full$cov.unscaled<-v
+ if (return.replicates){
+ attr(betas,"scale")<-design$scale
+ attr(betas,"rscales")<-design$rscales
+ attr(betas,"mse")<-design$mse
+ full$replicates<-betas
+ }
+ class(full)<-c("svrepglm", "svyglm", class(full))
+ full$call<-sys.call(-1)
+ if(!("formula" %in% names(full$call))) {
+ if (is.null(names(full$call)))
+ i<-1
+ else
+ i<-min(which(names(full$call)[-1]==""))
+ names(full$call)[i+1]<-"formula"
+ }
+ full$survey.design<-design
+ full
+}
+
+
+print.summary.svyglm<-function (x, digits = max(3, getOption("digits") - 3),
+ symbolic.cor = x$symbolic.cor,
+ signif.stars = getOption("show.signif.stars"), ...)
+{
+ ##if (!exists("printCoefmat")) printCoefmat<-print.coefmat
+
+ cat("\nCall:\n")
+ cat(paste(deparse(x$call), sep = "\n", collapse = "\n"),
+ "\n\n", sep = "")
+
+ cat("Survey design:\n")
+ print(x$survey.design$call)
+
+ if (!is.null(df <- x$df) && (nsingular <- df[3] - df[1]))
+ cat("\nCoefficients: (", nsingular, " not defined because of singularities)\n",
+ sep = "")
+ else cat("\nCoefficients:\n")
+ coefs <- x$coefficients
+ if (!is.null(aliased <- is.na(x$coefficients[,1])) && any(aliased)) {
+ cn <- names(aliased)
+ coefs <- matrix(NA, length(aliased), 4, dimnames = list(cn,
+ colnames(coefs)))
+ coefs[!aliased, ] <- x$coefficients
+ }
+ printCoefmat(coefs, digits = digits, signif.stars = signif.stars,
+ na.print = "NA", ...)
+
+ cat("\n(Dispersion parameter for ", x$family$family, " family taken to be ",
+ format(x$dispersion), ")\n\n", "Number of Fisher Scoring iterations: ",
+ x$iter, "\n", sep = "")
+ correl <- x$correlation
+ if (!is.null(correl)) {
+ p <- NCOL(correl)
+ if (p > 1) {
+ cat("\nCorrelation of Coefficients:\n")
+ if (is.logical(symbolic.cor) && symbolic.cor) {
+ print(symnum(correl, abbr.colnames = NULL))
+ }
+ else {
+ correl <- format(round(correl, 2), nsmall = 2,
+ digits = digits)
+ correl[!lower.tri(correl)] <- ""
+ print(correl[-1, -p, drop = FALSE], quote = FALSE)
+ }
+ }
+ }
+ cat("\n")
+ invisible(x)
+}
+
+
+
+predict.svrepglm <- function(object, newdata=NULL, total=NULL,
+ type = c("link", "response","terms"),
+ se.fit=(type!="terms"),
+ vcov=FALSE, return.replicates=!is.null(object$replicates),...){
+ if(is.null(newdata))
+ newdata<-model.frame(object$survey.design)
+ type<-match.arg(type)
+ if (type=="terms")
+ return(predterms(object,se=se.fit,...))
+ tt<-delete.response(terms(formula(object)))
+ mf<-model.frame(tt,data=newdata)
+ mm<-model.matrix(tt,mf)
+ if (!is.null(total) && attr(tt,"intercept")){
+ mm[,attr(tt,"intercept")]<-mm[,attr(tt,"intercept")]*total
+ }
+ eta<-drop(mm %*% coef(object))
+ d<-drop(object$family$mu.eta(eta))
+ eta<-switch(type, link=eta, response=object$family$linkinv(eta))
+ if(se.fit){
+ if(vcov){
+ vv<-mm %*% vcov(object) %*% t(mm)
+ attr(eta,"var")<-switch(type,
+ link=vv,
+ response=d*(t(vv*d)))
+ } else {
+ ## FIXME make this more efficient
+ vv<-drop(rowSums((mm %*% vcov(object)) * mm))
+ attr(eta,"var")<-switch(type,
+ link=vv,
+ response=drop(d*(t(vv*d))))
+ }
+ }
+ attr(eta,"statistic")<-type
+
+ if (return.replicates){
+ if (is.null(object$replicates)) {
+ warning("replicates are not present in the fit")
+ } else{
+ pred.replicates<-t(apply(object$replicates,1, function(beta){
+ etai<-drop(mm %*% beta)
+ switch(type, link=etai, response=object$family$linkinv(etai))
+ }))
+ attr(pred.replicates,"scale")<-attr(object$replicates,"scale")
+ attr(pred.replicates,"rscales")<-attr(object$replicates,"rscales")
+ attr(pred.replicates,"mse")<-attr(object$replicates,"mse")
+ eta<-list(eta,replicates=pred.replicates)
+ }
+ }
+
+ class(eta)<-"svrepstat"
+ eta
+ }
+
+
+
+
+svyratio.svyrep.design<-svrepratio<-function(numerator=formula,denominator, design,
+ na.rm=FALSE,formula,covmat=FALSE,
+ return.replicates=FALSE,deff=FALSE,...){
+
+ if (!exists(".Generic"))
+ .Deprecated("svyratio")
+
+ if (!inherits(design, "svyrep.design")) stop("design must be a svyrepdesign object")
+
+ if (inherits(numerator,"formula"))
+ numerator<-model.frame(numerator,design$variables, na.action=na.pass)
+ else if(typeof(numerator) %in% c("expression","symbol"))
+ numerator<-eval(numerator, design$variables)
+ if (inherits(denominator,"formula"))
+ denominator<-model.frame(denominator,design$variables, na.action=na.pass)
+ else if(typeof(denominator) %in% c("expression","symbol"))
+ denominator<-eval(denominator, design$variables)
+
+ nn<-NCOL(numerator)
+ nd<-NCOL(denominator)
+
+ all<-cbind(numerator,denominator)
+ nas<-!complete.cases(all)
+ if (na.rm==TRUE){
+ design<-design[!nas,]
+ all<-all[!nas,,drop=FALSE]
+ numerator<-numerator[!nas,,drop=FALSE]
+ denominator<-denominator[!nas,,drop=FALSE]
+ }
+ allstats<-svymean(all, design, return.replicates=TRUE)
+
+ rval<-list(ratio=outer(allstats$mean[1:nn], allstats$mean[nn+1:nd], "/"))
+
+ if (is.null(allstats$replicates)){
+ ##only self-representing strata.
+ vars<-matrix(0,nrow=nn,ncol=nd)
+ }else {
+ vars<-matrix(nrow=nn,ncol=nd)
+ if (deff) deffs<-matrix(nrow=nn,ncol=nd)
+ for(i in 1:nn){
+ for(j in 1:nd){
+ vars[i,j]<-svrVar(allstats$replicates[,i]/allstats$replicates[,nn+j],
+ design$scale, design$rscales,mse=design$mse,coef=rval$ratio[i,j])
+ if (deff)
+ deffs[i,j]<-deff(svytotal(numerator[,i]-rval[i,j]*denominator[,j],design))
+ }
+ }
+ }
+ if (covmat){
+ if (is.null(allstats$replicates))
+ vcovmat<-matrix(0,nn*nd,nn*nd)
+ else
+ vcovmat<-as.matrix(svrVar(allstats$replicates[,rep(1:nn,nd)]/allstats$replicates[,nn+rep(1:nd,each=nn)],
+ design$scale, design$rscales,mse=design$mse,coef=as.vector(rval$ratio)))
+ rownames(vcovmat)<-names(numerator)[rep(1:nn,nd)]
+ colnames(vcovmat)<-names(denominator)[rep(1:nd,each=nn)]
+ rval$vcov<-vcovmat
+ }
+ if (return.replicates) {
+ reps<-allstats$replicates[, rep(1:nn, nd)]/allstats$replicates[, nn + rep(1:nd, each = nn)]
+ attr(reps,"scale")<-design$scale
+ attr(reps,"rscales")<-design$rscales
+ attr(reps,"mse")<-design$mse
+ rval$replicates<-reps
+ }
+ rval$var<-vars
+ attr(rval,"call")<-sys.call()
+ if (deff) attr(rval,"deff")<-deffs
+ class(rval)<-"svyratio"
+ rval
+}
+
+vcov.svyratio <- function(object, ...){
+ covmat<-object$vcov
+ if (is.null(covmat)){
+ covmat<-matrix(NaN,length(object$var),length(object$var))
+ diag(covmat)<-as.vector(object$var)
+ }
+ nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/"))
+ dimnames(covmat)<-list(nms,nms)
+ covmat
+}
+
+residuals.svrepglm<-function(object,type = c("deviance", "pearson", "working",
+ "response", "partial"),...){
+ type<-match.arg(type)
+ if (type=="pearson"){
+ y <- object$y
+ mu <- object$fitted.values
+ wts <- object$prior.weights
+ r<-(y - mu) * sqrt(wts)/(sqrt(object$family$variance(mu))*sqrt(object$survey.design$pweights/sum(object$survey.design$pweights)))
+ if (is.null(object$na.action))
+ r
+ else
+ naresid(object$na.action, r)
+ } else
+ NextMethod()
+
+}
+
+logLik.svrepglm<-function(object,...){
+ stop("svrepglm not fitted by maximum likelihood.")
+}
+
+
+withReplicates<-function(design, theta, ..., return.replicates=FALSE){
+ UseMethod("withReplicates",design)
+}
+
+withReplicates.svrepvar<-function(design, theta, ...,return.replicates=FALSE){
+ if (is.null(reps<-design$replicates)) stop("object does not contain replicate estimates")
+
+ p<-sqrt(NCOL(reps))
+ if (is.function(theta)){
+ full<-theta(design[[1]],...)
+ thetas<-drop(t(apply(reps,1,
+ function(rr) theta(matrix(rr,p,p), ...))))
+ } else{
+ full<-eval(theta, list(.replicate=design[[1]]))
+ thetas<-drop(t(apply(reps,1,
+ function(rr) eval(theta, list(.replicate=matrix(rr,p,p))))))
+ }
+
+ v<-svrVar(thetas, attr(reps,"scale"), attr(reps,"rscales"), mse=attr(reps,"mse"), coef=full)
+
+ attr(full,"var")<-v
+ attr(full,"statistic")<-"theta"
+
+ if (return.replicates){
+ attr(thetas,"scale")<-attr(reps,"scale")
+ attr(thetas,"rscales")<-attr(reps,"rscales")
+ attr(thetas,"mse")<-attr(reps,"mse")
+ rval<-list(theta=full, replicates=thetas)
+ } else {
+ rval<-full
+ }
+ class(rval)<-"svrepstat"
+ rval
+
+
+}
+
+withReplicates.svrepstat<-function(design, theta, ..., return.replicates=FALSE){
+ if (is.null(reps<-design$replicates)) stop("object does not contain replicate estimates")
+
+ reps<-as.matrix(reps)
+
+ if (is.function(theta)){
+ full<-theta(design[[1]],...)
+ thetas<-drop(t(apply(reps,1,theta, ...)))
+ } else{
+ full<-eval(theta, list(.replicate=design[[1]]))
+ thetas<-drop(t(apply(reps,1,
+ function(rr) eval(theta, list(.replicate=rr)))))
+ }
+
+ v<-svrVar(thetas, attr(reps,"scale"), attr(reps,"rscales"), mse=attr(reps,"mse"), coef=full)
+
+ attr(full,"var")<-v
+ attr(full,"statistic")<-"theta"
+
+ if (return.replicates){
+ attr(thetas,"scale")<-attr(reps,"scale")
+ attr(thetas,"rscales")<-attr(reps,"rscales")
+ attr(thetas,"mse")<-attr(reps,"mse")
+ rval<-list(theta=full, replicates=thetas)
+ } else {
+ rval<-full
+ }
+ class(rval)<-"svrepstat"
+ rval
+}
+
+
+withReplicates.svyrep.design<-function(design, theta, rho=NULL,...,
+ scale.weights=FALSE,
+ return.replicates=FALSE){
+ wts<-design$repweights
+ scale<-design$scale
+ rscales<-design$rscales
+ if (!is.null(rho)) .NotYetUsed("rho")
+
+ if (scale.weights)
+ pwts<-design$pweights/sum(design$pweights)
+ else
+ pwts<-design$pweights
+
+ if (inherits(wts,"repweights_compressed")){
+ if (scale.weights)
+ wts$weights<-sweep(wts$weights,2,drop(colSums(wts$weights)),"/")
+ } else {
+ if (scale.weights)
+ wts<-sweep(wts,2, drop(colSums(wts)),"/")
+ }
+
+ rpwts<-if (design$combined.weights) 1 else pwts
+ data<-design$variables
+
+ if (is.function(theta)){
+ full<-theta(pwts,data,...)
+ thetas<-drop(t(apply(wts,2,
+ function(ww) theta(as.vector(ww)*rpwts, data, ...))))
+ } else{
+ .weights<-pwts
+ full<-with(data, eval(theta))
+ thetas<-drop(t(apply(wts,2,
+ function(.weights) {.weights<-as.vector(.weights)*rpwts
+ with(data, eval(theta))})))
+ }
+
+ v<-svrVar(thetas, scale, rscales,mse=design$mse, coef=full)
+
+ attr(full,"var")<-v
+ attr(full,"statistic")<-"theta"
+
+ if (return.replicates)
+ rval<-list(theta=full, replicates=thetas)
+ else
+ rval<-full
+ class(rval)<-"svrepstat"
+ rval
+ }
+
+coef.svrepstat<-function(object,...){
+ if (is.list(object)) object<-object[[1]]
+ attr(object,"statistic")<-NULL
+ attr(object,"deff")<-NULL
+ attr(object,"var")<-NULL
+ unclass(object)
+}
+
+vcov.svrepstat<-function (object, ...)
+{
+ nms <- names(coef(object))
+ if (is.list(object))
+ object <- object[[1]]
+ v <- as.matrix(attr(object, "var"))
+
+ if (length(object) == NCOL(v)) {
+ dimnames(v) <- list(nms, nms)
+ v
+ }
+ else if (length(object) == length(v)) {
+ dnms <- dimnames(coef(object))
+ vmat <- matrix(NA, nrow = length(object), ncol = length(object))
+ diag(vmat) <- as.vector(v)
+ nms <- as.vector(outer(dnms[[1]], dnms[[2]], paste, sep = ":"))
+ dimnames(vmat) <- list(nms, nms)
+ vmat
+ }
+}
+
+
+
+as.data.frame.svrepstat<-function(x,...){
+ if (is.list(x)) {
+ x<-x[[1]]
+ class(x)<-"svrepstat"
+ }
+ rval<-data.frame(statistic=coef(x),SE=SE(x))
+ names(rval)[1]<-attr(x,"statistic")
+ if (!is.null(attr(x,"deff")))
+ rval<-cbind(rval,deff=deff(x))
+ rval
+}
+
+SE<-function(object,...){
+ UseMethod("SE")
+}
+
+SE.default<-function(object,...){
+ sqrt(diag(vcov(object,...)))
+}
+
+SE.svrepstat<-function(object,...){
+ if (is.list(object)){
+ object<-object[[1]]
+ }
+ vv<-as.matrix(attr(object,"var"))
+ if (!is.null(dim(object)) && length(object)==length(vv))
+ sqrt(vv)
+ else
+ sqrt(diag(vv))
+}
+
+print.svrepstat<-function(x,...){
+ if (is.list(x)){
+ x<-x[[1]]
+ }
+ vv<-attr(x,"var")
+ deff<-attr(x, "deff")
+ if (!is.null(dim(x)) && length(x)==length(vv)){
+ cat("Statistic:\n")
+ prmatrix(x)
+ cat("SE:\n")
+ print(sqrt(vv))
+ if (!is.null(deff)){
+ cat("Design Effect:\n")
+ printCoefmat()
+ }
+ } else if(length(x)==NCOL(vv)){
+ m<-cbind(x,sqrt(diag(as.matrix(vv))))
+ if (is.null(deff))
+ colnames(m)<-c(attr(x,"statistic"),"SE")
+ else {
+ m<-cbind(m,deff(x))
+ colnames(m)<-c(attr(x,"statistic"),"SE","DEff")
+ }
+ printCoefmat(m)
+ } else {stop("incorrect structure of svrepstat object")}
+
+ naa<-attr(vv,"na.replicates")
+ if (!is.null(naa))
+ cat("Note: NA results discarded for",length(naa),"replicates (",naa,")\n")
+}
+
+summary.svrepglm<-function (object, correlation = FALSE, df.resid=NULL,...)
+{
+ Qr <- object$qr
+ est.disp <- TRUE
+ if (is.null(df.resid))
+ df.r <- object$df.residual
+ else
+ df.r<-df.resid
+ presid<-resid(object,"pearson")
+ dispersion<- sum( object$survey.design$pweights*presid^2,na.rm=TRUE)/sum(object$survey.design$pweights)
+ coef.p <- coef(object)
+ covmat<-vcov(object)
+ dimnames(covmat) <- list(names(coef.p), names(coef.p))
+ var.cf <- diag(covmat)
+ s.err <- sqrt(var.cf)
+ tvalue <- coef.p/s.err
+ dn <- c("Estimate", "Std. Error")
+ if (!est.disp) {
+ pvalue <- 2 * pnorm(-abs(tvalue))
+ coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
+ dimnames(coef.table) <- list(names(coef.p), c(dn, "z value",
+ "Pr(>|z|)"))
+ }
+ else if (df.r > 0) {
+ pvalue <- 2 * pt(-abs(tvalue), df.r)
+ coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
+ dimnames(coef.table) <- list(names(coef.p), c(dn, "t value",
+ "Pr(>|t|)"))
+ }
+ else {
+ coef.table <- cbind(coef.p, Inf)
+ dimnames(coef.table) <- list(names(coef.p), dn)
+ }
+ ans <- c(object[c("call", "terms", "family", "deviance",
+ "aic", "contrasts", "df.residual", "null.deviance", "df.null",
+ "iter")], list(deviance.resid = residuals(object, type = "deviance"),
+ aic = object$aic, coefficients = coef.table, dispersion = dispersion,
+ df = c(object$rank, df.r,NCOL(Qr$qr)), cov.unscaled = covmat,
+ cov.scaled = covmat))
+ if (correlation) {
+ dd <- sqrt(diag(covmat))
+ ans$correlation <- covmat/outer(dd, dd)
+ }
+
+ ans$aliased<-is.na(ans$coef)
+ ans$survey.design<-list(call=object$survey.design$call,
+ type=object$survey.design$type)
+ class(ans) <- c("summary.svyglm","summary.glm")
+ return(ans)
+}
+
+
+svytable.svyrep.design<-svreptable<-function(formula, design,
+ Ntotal=sum(weights(design, "sampling")),
+ round=FALSE,...){
+
+ if (!exists(".Generic",inherits=FALSE))
+ .Deprecated("svytable")
+
+ weights<-design$pweights
+ if (is.data.frame(weights)) weights<-weights[[1]]
+ ## unstratified or unadjusted.
+ if (is.null(Ntotal) || length(Ntotal)==1){
+ ff<-eval(substitute(lhs~rhs,list(lhs=quote(weights), rhs=formula[[2]])))
+ tbl<-xtabs(ff, data=design$variables,...)
+ if (!is.null(Ntotal)) {
+ tbl<-tbl*sum(Ntotal)/sum(tbl)
+ }
+ if (round)
+ tbl<-round(tbl)
+ class(tbl) <- c("svytable", class(tbl))
+ attr(tbl, "call")<-match.call()
+ return(tbl)
+ }
+ ## adjusted and stratified
+ ff<-eval(substitute(lhs~strata+rhs,list(lhs=quote(weights),
+ rhs=formula[[2]],
+ strata=quote(design$strata))))
+ tbl<-xtabs(ff, data=design$variables,...)
+ ss<-match(sort(unique(design$strata)), Ntotal[,1])
+ dm<-dim(tbl)
+ layer<-prod(dm[-1])
+ tbl<-sweep(tbl,1,Ntotal[ss, 2]/apply(tbl,1,sum),"*")
+ tbl<-apply(tbl, 2:length(dm), sum)
+ if (round)
+ tbl<-round(tbl)
+ class(tbl)<-c("svytable", "xtabs","table")
+ attr(tbl, "call")<-match.call()
+
+ tbl
+}
+
+
+postStratify<-function(design,strata, population, partial=FALSE,...){
+ UseMethod("postStratify")
+}
+
+
+
+postStratify.svyrep.design<-function(design, strata, population,
+ partial=FALSE,compress=NULL,...){
+
+ if(inherits(strata,"formula")){
+ mf<-substitute(model.frame(strata, data=design$variables,na.action=na.fail))
+ strata<-eval.parent(mf)
+ }
+ strata<-as.data.frame(strata)
+ if (is.null(compress))
+ compress<-inherits(design$repweights, "repweights_compressed")
+
+ sampletable<-xtabs(design$pweights~.,data=strata)
+ sampletable<-as.data.frame(sampletable)
+
+ if (inherits(population,"table"))
+ population<-as.data.frame(population)
+ else if (is.data.frame(population))
+ population$Freq <- as.vector(population$Freq)
+ else
+ stop("population must be a table or dataframe")
+
+ if (!all(names(strata) %in% names(population)))
+ stop("Stratifying variables don't match")
+ nn<- names(population) %in% names(strata)
+ if (sum(!nn)!=1)
+ stop("stratifying variables don't match")
+
+ names(population)[which(!nn)]<-"Pop.Freq"
+
+ both<-merge(sampletable, population, by=names(strata), all=TRUE)
+
+ samplezero <- both$Freq %in% c(0, NA)
+ popzero <- both$Pop.Freq %in% c(0, NA)
+ both<-both[!(samplezero & popzero),]
+
+ if (any(onlysample<- popzero & !samplezero)){
+ print(both[onlysample,])
+ stop("Strata in sample absent from population. This Can't Happen")
+ }
+ if (any(onlypop <- samplezero & !popzero)){
+ if (partial){
+ both<-both[!onlypop,]
+ warning("Some strata absent from sample: ignored")
+ } else {
+ print(both[onlypop,])
+ stop("Some strata absent from sample: use partial=TRUE to ignore them.")
+ }
+ }
+
+ reweight<-both$Pop.Freq/both$Freq
+ both$label <- do.call("interaction", list(both[,names(strata)]))
+ designlabel <- do.call("interaction", strata)
+ index<-match(designlabel, both$label)
+
+ oldpw<-design$pweights
+ design$pweights<-design$pweights*reweight[index]
+
+
+ if (design$combined.weights){
+ replicateFreq<- rowsum(as.matrix(design$repweights),
+ match(designlabel, both$label),
+ reorder=TRUE)
+ repreweight<- both$Pop.Freq/replicateFreq
+ design$repweights <- as.matrix(design$repweights)*repreweight[index]
+ } else {
+ replicateFreq<- rowsum(as.matrix(design$repweights)*oldpw,
+ match(designlabel, both$label),
+ reorder=TRUE)
+ repreweight<- both$Pop.Freq/replicateFreq
+ design$repweights <- as.matrix(design$repweights)* (repreweight/reweight)[index,]
+ }
+
+ if (compress) design$repweights<-compressWeights(design$repweights)
+
+ design$call<-sys.call(-1)
+ if(!is.null(design$degf)){
+ design$degf<-NULL
+ design$degf<-degf(design)
+ }
+ design
+}
+
+
+rake<-function(design, sample.margins, population.margins,
+ control=list(maxit=10, epsilon=1, verbose=FALSE),
+ compress=NULL){
+
+ if (!missing(control)){
+ control.defaults<-formals(rake)$control
+ for(n in names(control.defaults))
+ if(!(n %in% names(control)))
+ control[[n]]<-control.defaults[[n]]
+ }
+
+ is.rep<-inherits(design,"svyrep.design")
+
+ if (is.rep && is.null(compress))
+ compress<-inherits(design$repweights,"repweights_compressed")
+
+ if (is.rep) design$degf<-NULL
+
+ if (length(sample.margins)!=length(population.margins))
+ stop("sample.margins and population.margins do not match.")
+
+ nmar<-length(sample.margins)
+
+ if (control$epsilon<1)
+ epsilon<-control$epsilon*sum(weights(design,"sampling"))
+ else
+ epsilon<-control$epsilon
+
+
+
+ strata<-lapply(sample.margins, function(margin)
+ if(inherits(margin,"formula")){
+ mf<-model.frame(margin, data=design$variables,na.action=na.fail)
+ }
+ )
+
+
+ allterms<-unlist(lapply(sample.margins,all.vars))
+ ff<-formula(paste("~", paste(allterms,collapse="+"),sep=""))
+ oldtable<-svytable(ff, design)
+ if (control$verbose)
+ print(oldtable)
+
+ oldpoststrata<-design$postStrata
+ iter<-0
+ converged<-FALSE
+ while(iter < control$maxit){
+ ## we don't want to accumulate more poststrata with each iteration
+ design$postStrata<-NULL
+
+ for(i in 1:nmar){
+ design<-postStratify(design, strata[[i]],
+ population.margins[[i]],
+ compress=FALSE)
+ }
+ newtable<-svytable(ff, design)
+ if (control$verbose)
+ print(newtable)
+
+ delta<-max(abs(oldtable-newtable))
+ if (delta<epsilon){
+ converged<-TRUE
+ break
+ }
+ oldtable<-newtable
+ iter<-iter+1
+ }
+
+ ## changed in 3.6-3 to allow the projections to be iterated
+ ## in svyrecvar
+ rakestrata<-design$postStrata
+ if(!is.null(rakestrata)){
+ class(rakestrata)<-"raking"
+ design$postStrata<-c(oldpoststrata, list(rakestrata))
+ }
+
+ design$call<-sys.call()
+
+ if (is.rep && compress)
+ design$repweights<-compressWeights(design$repweights)
+ if(is.rep)
+ design$degf<-degf(design)
+
+ if(!converged)
+ warning("Raking did not converge after ", iter, " iterations.\n")
+
+ return(design)
+
+}
+
+
+
+
+## degrees of freedom for repweights design
+degf<-function(design,...) UseMethod("degf")
+
+degf.svyrep.design<-function(design,tol=1e-5,...){
+ if (!inherits(design,"svyrep.design"))
+ stop("Not a survey design with replicate weights")
+ rval<-design$degf ##cached version
+ if(is.null(rval))
+ rval<-qr(weights(design,"analysis"), tol=1e-5)$rank-1
+ rval
+}
+
+degf.survey.design2<-function(design,...){
+ inset<- weights(design,"sampling")!=0
+ length(unique(design$cluster[inset, 1])) - length(unique(design$strata[inset, 1]))
+}
+
+degf.twophase<-function(design,...){
+ degf(design$phase2)
+}
+
+dim.svyrep.design<-function(x) dim(x$variables)
diff --git a/R/svycdf.R b/R/svycdf.R
new file mode 100644
index 0000000..ddc641a
--- /dev/null
+++ b/R/svycdf.R
@@ -0,0 +1,50 @@
+svycdf<-function(formula,design,na.rm=TRUE,...) UseMethod("svycdf",design)
+
+svycdf.default<-function(formula, design,na.rm=TRUE,...){
+ if (inherits(formula, "formula"))
+ x <- model.frame(formula, model.frame(design), na.action = na.pass)
+ else if (typeof(formula) %in% c("expression", "symbol"))
+ x <- eval(formula, model.frame(design, na.action = na.pass))
+ else x<-formula
+ if (na.rm) {
+ nas <- rowSums(is.na(x))
+ x <- x[nas == 0, , drop = FALSE]
+ }
+ rval<-vector("list",ncol(x))
+ names(rval)<-names(x)
+ for(i in 1:ncol(x)){
+ xx<-x[,i]
+ w <- weights(design,type="sampling")[nas==0]
+ oo<-order(xx)
+ cum.w<-cumsum(w[oo])/sum(w)
+ cdf <- approxfun( xx[oo],cum.w, method = "constant",
+ yleft =0, yright =1,ties="max")
+
+ class(cdf)<-"stepfun"
+ call.i<-match.call()
+ call.i$formula<-as.formula(paste("~",names(x)[i]))
+ attr(cdf,"call")<-call.i
+ rval[[names(x)[i]]]<-cdf
+ }
+ class(rval)<-"svycdf"
+ cc<-sys.call()
+ cc[[1]]<-as.name(.Generic)
+ attr(rval,"call")<-cc
+ rval
+}
+
+
+print.svycdf<-function(x,...){
+ cat("Weighted ECDFs: ")
+ print(attr(x,"call"))
+ invisible(x)
+ }
+
+plot.svycdf<-function(x,xlab=NULL,...){
+ if(is.null(xlab))
+ xlab<-names(x)
+ else if (length(xlab)==1)
+ xlab<-rep(xlab,length(names(x)))
+
+ for (i in 1:length(x)) plot(x[[i]], xlab =xlab[i], ...)
+}
diff --git a/R/svyhist.R b/R/svyhist.R
new file mode 100644
index 0000000..f3e1efe
--- /dev/null
+++ b/R/svyhist.R
@@ -0,0 +1,22 @@
+svyhist<-function(formula, design, breaks = "Sturges",
+ include.lowest = TRUE, right = TRUE, xlab=NULL,
+ main=NULL, probability=TRUE,
+ freq=!probability,...){
+ if (inherits(design,"DBIsvydesign") || inherits(design,"ODBCsvydesign")){
+ design$variables<-getvars(formula, design$db$connection, design$db$tablename,
+ updates = design$updates)
+ class(design)<-"survey.design2"
+ }
+ mf<-model.frame(formula,model.frame(design), na.action=na.pass)
+ if (ncol(mf)>1) stop("Only one variable allowed.")
+ variable<-mf[,1]
+ varname<-names(mf)
+ h <- hist(variable, plot=FALSE, breaks=breaks,right=right)
+ props <- coef(svymean(~cut(variable, h$breaks,right=right, include.lowest=include.lowest),
+ design, na.rm=TRUE))
+ h$density<-props/diff(h$breaks)
+ h$counts <- props*sum(weights(design,"sampling"))
+ if (is.null(xlab)) xlab<-varname
+ if (is.null(main)) main<-paste("Histogram of",varname)
+ plot(h, ..., freq=freq,xlab=xlab,main=main)
+}
diff --git a/R/svykappa.R b/R/svykappa.R
new file mode 100644
index 0000000..6982d94
--- /dev/null
+++ b/R/svykappa.R
@@ -0,0 +1,32 @@
+
+svykappa<-function(formula, design,...) UseMethod("svykappa",design)
+
+svykappa.default<-function(formula, design,...) {
+ if (ncol(attr(terms(formula), "factors")) != 2)
+ stop("kappa is only computed for two variables")
+ rows <- formula[[2]][[2]]
+ cols <- formula[[2]][[3]]
+ df <- model.frame(design)
+ nrow <- length(unique(df[[as.character(rows)]]))
+ ncol <- length(unique(df[[as.character(cols)]]))
+ rnames<-paste(".",letters,"_",sep="")
+ cnames<-paste(".",LETTERS,"_",sep="")
+ if (nrow != ncol)
+ stop("number of categories is different")
+ probs <- eval(bquote(svymean(~.(rows) + .(cols) + interaction(.(rows),
+ .(cols)), design)))
+ nms <- c(rnames[1:nrow], cnames[1:ncol], outer(1:nrow,
+ 1:ncol, function(i, j) paste(rnames[i], cnames[j],
+ sep = ".")))
+ names(probs) <- nms
+ v <- attr(probs, "var")
+ dimnames(v) <- list(nms, nms)
+ attr(probs, "var") <- v
+ obs <- parse(text = paste(nms[nrow + ncol + 1+ (0:(nrow-1))*(ncol+1)],
+ collapse = "+"))[[1]]
+ expect <- parse(text = paste(nms[1:nrow], nms[nrow + 1:ncol],
+ sep = "*", collapse = "+"))[[1]]
+ svycontrast(probs, list(kappa = bquote((.(obs) - .(expect))/(1 -
+ .(expect)))))
+}
+
diff --git a/R/svykm.R b/R/svykm.R
new file mode 100644
index 0000000..22f4661
--- /dev/null
+++ b/R/svykm.R
@@ -0,0 +1,381 @@
+
+svykm<-function(formula, design, se=FALSE, ...) UseMethod("svykm",design)
+
+svykm.survey.design<-function(formula, design,se=FALSE, ...){
+ if (!inherits(formula,"formula")) stop("need a formula")
+ if (length(formula)!=3) stop("need a two-sided formula")
+ mf<-model.frame(formula, model.frame(design), na.action=na.pass)
+ mf<-na.omit(mf)
+ drop<-attr(mf,"na.action")
+ if (!is.null(drop))
+ design<-design[-drop,]
+ y<-model.response(mf)
+ if (!is.Surv(y) || attr(y,"type")!="right")
+ stop("response must be a right-censored Surv object")
+
+ if (ncol(mf)==1) {
+ if (se)
+ s<-km.stderr(y,design)
+ else
+ s<-svykm.fit(y,weights(design))
+ } else {
+ x<-mf[,-1]
+ if (NCOL(x)>1)
+ groups<-do.call(interaction,x)
+ else
+ groups<-as.factor(x)
+
+ if (se){
+ lhs<-formula
+ lhs[[3]]<-1
+ s<-lapply(levels(groups), function(g) svykm(lhs, subset(design,groups==g),se=TRUE))
+ }else{
+ s<-lapply(levels(groups), function(g) svykm.fit(y[groups==g],weights(design)[groups==g]))
+ }
+ names(s)<-levels(groups)
+ class(s)<-"svykmlist"
+ }
+ call<-match.call()
+ call[[1]]<-as.name(.Generic)
+ attr(s,"call")<-call
+ attr(s, "formula")<-formula
+ attr(s, "na.action")<-drop
+ return(s)
+}
+
+
+svykm.svyrep.design<-function(formula, design,se=FALSE, ...){
+ if (!inherits(formula,"formula")) stop("need a formula")
+ if (length(formula)!=3) stop("need a two-sided formula")
+ mf<-model.frame(formula, model.frame(design), na.action=na.pass)
+ mf<-na.omit(mf)
+ drop<-attr(mf,"na.action")
+ if (!is.null(drop))
+ design<-design[-drop,]
+ y<-model.response(mf)
+ if (!is.Surv(y) || attr(y,"type")!="right")
+ stop("response must be a right-censored Surv object")
+
+ if (ncol(mf)==1) {
+ if (se)
+ stop("SE not yet available")
+ else
+ s<-svykm.fit(y,weights(design,"sampling"))
+ } else {
+ x<-mf[,-1]
+ if (NCOL(x)>1)
+ groups<-do.call(interaction,x)
+ else
+ groups<-as.factor(x)
+
+ if (se){
+ lhs<-formula
+ lhs[[3]]<-1
+ s<-lapply(levels(groups), function(g) svykm(lhs, subset(design,groups==g),se=TRUE))
+ }else{
+ s<-lapply(levels(groups), function(g) svykm.fit(y[groups==g],weights(design)[groups==g]))
+ }
+ names(s)<-levels(groups)
+ class(s)<-"svykmlist"
+ }
+ call<-match.call()
+ call[[1]]<-as.name(.Generic)
+ attr(s,"call")<-call
+ attr(s, "formula")<-formula
+ attr(s, "na.action")<-drop
+ return(s)
+}
+
+
+svykm.fit<-function(y,w){
+ t<-y[,"time"]
+ s<-y[,"status"]
+ nn<-rowsum(cbind(s,1)*w,t)
+ tt<-sort(unique(t))
+ N<-c(sum(w),sum(w),sum(w)-cumsum(nn[-nrow(nn),2]))
+ d<-c(0,nn[,1])
+ surv<-pmax(0,cumprod(1-d/N))
+ rval<-list(time=c(0,tt), surv=surv)
+ class(rval)<-"svykm"
+ rval
+}
+
+km.stderr<-function(survobj,design){
+ time<-survobj[,'time']
+ status<-survobj[,'status']
+ ## Brute force and ignorance: compute Y and dN as totals, use delta-method
+ keep<-which((status==1) & (weights(design)!=0))
+ y<-outer(time,time[keep],">=")
+ dN<-diag(status)[,keep]
+ oo<-order(time[keep], -status[keep])
+ okeep<-keep[oo]
+ ntimes<-length(oo)
+ ttime<-time[okeep]
+ sstatus<-status[okeep]
+
+ totals<-svytotal(cbind(dN[,oo],y[,oo]), design)
+ rm(dN)
+ y<-coef(totals)[-(1:ntimes)]
+ dNbar<-coef(totals)[1:ntimes]
+
+ h<-cumsum(dNbar/y)
+
+ dVn<- vcov(totals)[(1:ntimes),(1:ntimes)]/outer(y,y)
+ dVy <- vcov(totals)[-(1:ntimes),-(1:ntimes)]*outer(dNbar/y^2,dNbar/y^2)
+ dCVny<- -vcov(totals)[(1:ntimes),-(1:ntimes)]*outer(1/y,dNbar/y^2)
+ dV<-dVn+dVy+dCVny+t(dCVny)
+
+ V<-numeric(ntimes)
+ V[1]<-dV[1,1]
+ for(i in 2:ntimes) V[i]<-V[i-1]+sum(dV[1:(i-1),i])+sum(dV[i,1:i])
+
+ rval<-list(time=ttime,surv=exp(-h),varlog=V)
+ class(rval)<-"svykm"
+ rval
+}
+
+
+plot.svykm<-function(x,xlab="time",ylab="Proportion surviving",ylim=c(0,1),ci=NULL,lty=1,...){
+ if (is.null(ci))
+ ci<-!is.null(x$varlog)
+
+ plot(x$time,x$surv,xlab=xlab,ylab=ylab, type="s",ylim=ylim,lty=lty,...)
+
+ if (ci){
+ if (is.null(x$varlog))
+ warning("No standard errors available in object")
+ else{
+ lines(x$time,exp(log(x$surv)-1.96*sqrt(x$varlog)),lty=2,type="s",...)
+ lines(x$time,pmin(1,exp(log(x$surv)+1.96*sqrt(x$varlog))),lty=2,type="s",...)
+ }
+ }
+ invisible(x)
+}
+
+
+lines.svykm<-function(x,xlab="time",type="s",ci=FALSE,lty=1,...){
+ lines(x$time,x$surv, type="s",lty=lty,...)
+ if (ci){
+ if (is.null(x$varlog))
+ warning("no standard errors available in object")
+ else {
+ lines(x$time,exp(log(x$surv)-1.96*sqrt(x$varlog)),lty=2,type="s",...)
+ lines(x$time,pmin(1,exp(log(x$surv)+1.96*sqrt(x$varlog))),lty=2,type="s",...)
+ }
+ }
+ invisible(x)
+}
+
+plot.svykmlist<-function(x, pars=NULL, ci=FALSE,...){
+ if (!is.null(pars)) pars<-as.data.frame(pars,stringsAsFactors=FALSE)
+
+ if(is.null(pars))
+ plot(x[[1]],ci=ci,...)
+ else
+ do.call(plot,c(list(x[[1]]),pars[1,,drop=FALSE],ci=ci,...))
+
+ m<-length(x)
+ if(m==1) return()
+ for(i in 2:m){
+ if(is.null(pars))
+ lines(x[[i]],ci=ci,...)
+ else
+ do.call(lines,c(list(x[[i]]),pars[i,,drop=FALSE],ci=ci,...))
+ }
+ invisible(x)
+}
+
+print.svykm<-function(x, digits=3,...,header=TRUE){
+ if (header) {cat("Weighted survival curve: ")
+ print(attr(x,"call"))}
+ suppressWarnings({iq1<-min(which(x$surv<=0.75))
+ iq2<-min(which(x$surv<=0.5))
+ iq3<-min(which(x$surv<=0.25))})
+ if (is.finite(iq1)) q1<-x$time[iq1] else q1<-Inf
+ if (is.finite(iq2)) q2<-x$time[iq2] else q2<-Inf
+ if (is.finite(iq3)) q3<-x$time[iq3] else q3<-Inf
+ cat("Q1 =",round(q1,digits)," median =",round(q2,digits)," Q3 =",round(q3,digits),"\n")
+ invisible(x)
+}
+
+print.svykmlist<-function(x, digits=3,...){
+ cat("Weighted survival curves:\n")
+ print(attr(x,"call"))
+ for(i in 1:length(x)){
+ cat(names(x)[i],": ")
+ print(x[[i]],digits=digits,header=FALSE)
+ }
+ invisible(x)
+}
+
+quantile.svykm<-function(x, probs=c(0.75,0.5,0.25),ci=FALSE,level=0.95,...){
+
+ iq<-sapply(probs, function(p) suppressWarnings(min(which(x$surv<=p))))
+ qq<-sapply(iq, function(i) if (is.finite(i)) x$time[i] else Inf)
+ names(qq)<-probs
+ if (ci){
+ if(is.null(x$varlog)){
+ warning("no confidence interval available.")
+ } else {
+ halfalpha<-(1-level)/2
+ z<-qnorm(halfalpha, lower.tail=FALSE)
+ su<-exp(log(x$surv)+z*sqrt(x$varlog))
+ iu<-sapply(probs, function(p) suppressWarnings(min(which(su<=p))))
+ qu<-sapply(iu, function(i) if (is.finite(i)) x$time[i] else Inf)
+ sl<-exp(log(x$surv)-z*sqrt(x$varlog))
+ il<-sapply(probs, function(p) suppressWarnings(min(which(sl<=p))))
+ ql<-sapply(il, function(i) if (is.finite(i)) x$time[i] else Inf)
+ ci<-cbind(ql,qu)
+ rownames(ci)<-probs
+ colnames(ci)<-format(c(halfalpha,1-halfalpha),3)
+ attr(qq,"ci")<-ci
+ }
+ }
+ qq
+}
+
+
+confint.svykm<-function(object, parm, level=0.95,...){
+ if (is.null(object$varlog)) stop("no standard errors in object")
+
+ parm<-as.numeric(parm)
+ idx<-sapply(parm, function(t) max(which(object$time<=t)))
+ z<-qnorm((1-level)/2)
+ ci<-exp(log(object$surv[idx])+outer(sqrt(object$varlog[idx]),c(z,-z)))
+ ci[,2]<-pmin(ci[,2],1)
+ rownames(ci)<-parm
+ colnames(ci)<-format( c((1-level)/2, 1-(1-level)/2),3)
+ ci
+}
+
+
+predict.svycoxph<-function(object, newdata, se=FALSE,
+ type=c("lp", "risk", "expected", "terms","curve"),
+ ...){
+
+ type<-match.arg(type)
+ if(type!="curve") return(NextMethod())
+
+ design<-object$survey.design
+ response<-object$y
+
+ if (!is.null(attr(terms(object), "specials")$strata))
+ stop("Stratified models are not supported yet")
+
+ if (attr(response,"type")=="counting"){
+ time<-object$y[,2]
+ status<-object$y[,'status']
+ entry<-object$y[,1]
+ } else if (attr(response,'type')=="right"){
+ time<-object$y[,"time"]
+ status<-object$y[,"status"]
+ entry<-rep(-Inf,length(time))
+ } else stop("unsupported survival type")
+ if(is.null(object$na.action)){
+ design<-object$survey.design
+ } else {
+ design<-object$survey.design[-object$na.action,]
+ }
+
+ ff<-delete.response(terms(formula(object)))
+ zmf<-model.frame(ff, newdata)
+ z.pred<-model.matrix(ff, zmf)[,-1,drop=FALSE]
+
+##
+## The simple case first
+##
+ risk<-getS3method("predict","coxph")(object,type="risk",se.fit=FALSE)
+ if(se==FALSE){
+ tt<-c(time,entry)
+ ss<-c(status,rep(0,length(entry)))
+ ee<-c(rep(1,length(status)),rep(-1,length(entry)))
+ oo<-order(tt,-ee,-ss)
+ dN<-ss[oo]
+ w<-rep(weights(design),2)[oo]
+ risks<-rep(risk,2)
+ Y<-rev(cumsum(rev(risks[oo]*w*ee[oo])))
+ keep<-dN>0
+ s<-vector("list",nrow(z.pred))
+ beta<-coef(object)
+ h0<- cumsum( (w*dN/Y)[keep] )
+ for(i in 1:nrow(z.pred)){
+ zi<-z.pred[i,]-object$means
+ s[[i]]<-list(time=time[oo][keep],
+ surv=exp(-h0 * exp(sum(beta*zi))),
+ call=sys.call())
+ class(s[[i]])<-c("svykmcox","svykm")
+ }
+ names(s)<-rownames(newdata)
+ return(s)
+ }
+##
+## The hard case: curves with standard errors
+##
+ if(!inherits(design,"survey.design"))
+ stop("replicate-weight designs not supported yet")
+
+ keep<-which((status==1) & (weights(design)!=0))
+ y<-outer(time,time[keep],">=")*risk*outer(entry,time[keep],"<=")
+ dN<-diag(status)[,keep]
+ oo<-order(time[keep], -status[keep])
+ okeep<-keep[oo]
+ ntimes<-length(oo)
+ ttime<-time[okeep]
+ sstatus<-status[okeep]
+ totals<-svytotal(cbind(dN[,oo],y[,oo]), design)
+ rm(dN)
+
+ y<-coef(totals)[-(1:ntimes)]
+ dNbar<-coef(totals)[1:ntimes]
+ vtotals<-vcov(totals)
+ rm(totals)
+
+ h<-cumsum(dNbar/y)
+
+ dVn<- vtotals[(1:ntimes),(1:ntimes)]/outer(y,y)
+ dVy <- vtotals[-(1:ntimes),-(1:ntimes)]*outer(dNbar/y^2,dNbar/y^2)
+ dCVny<- -vtotals[(1:ntimes),-(1:ntimes)]*outer(1/y,dNbar/y^2)
+ dV<-dVn+dVy+dCVny+t(dCVny)
+
+ det<-suppressWarnings(coxph.detail(object))
+ ze<-sweep(as.matrix(det$means)[rep(1:length(det$time), det$nevent),,drop=FALSE],
+ 2, object$means)
+ rm(det)
+
+ dH<-dNbar/y
+ h.ze<-dH*ze
+ varbeta<-vcov(object)
+
+ Vh<-numeric(ntimes)
+ Vh[1]<-dV[1,1]
+ for(i in 2:ntimes) Vh[i]<-Vh[i-1]+sum(dV[1:(i-1),i])+sum(dV[i,1:i])
+ dVb<-numeric(ntimes)
+ for(i in 1:ntimes) dVb[i]<-crossprod(h.ze[i,],varbeta%*%(h.ze[i,]))
+ Vb<-cumsum(dVb)
+ dCV<-matrix(nrow=ntimes,ncol=NCOL(ze))
+ for(i in 1:ntimes) dCV[i,] <- -varbeta%*%(h.ze[i,])
+ CV<-apply(dCV,2,cumsum)
+
+ V0<-Vh+Vb
+ s0<-exp(-h)
+
+ s<-vector("list",nrow(z.pred))
+ for(i in 1:nrow(z.pred)){
+ zi<-z.pred[i,]-object$means
+ riski<-exp(sum(zi*coef(object)))
+ Vz<-drop(crossprod(zi,varbeta%*%zi))*riski^2*h^2
+ CVz<-colSums(t(dCV)*zi)*riski^2*h
+
+ V<-V0*riski^2+Vz+CVz*2
+ s[[i]]<-list(time=ttime,surv=exp(-h*riski), varlog=V)
+ class(s[[i]])<-c("svykm.cox","svykm")
+ }
+ names(s)<-rownames(newdata)
+ scall<-sys.call()
+ scall[[1]]<-as.name(.Generic)
+ attr(s,"call")<-scall
+ class(s)<-c("svykmlist.cox","svykmlist")
+ return(s)
+}
+
+
diff --git a/R/svymi.R b/R/svymi.R
new file mode 100644
index 0000000..d6b0f5a
--- /dev/null
+++ b/R/svymi.R
@@ -0,0 +1,249 @@
+
+svydesign.imputationList<-function(ids, probs = NULL, strata = NULL,
+ variables = NULL, fpc = NULL, data, nest = FALSE,
+ check.strata = !nest, weights = NULL, pps=FALSE,...){
+ designs <- lapply(data$imputations, function(d) svydesign(ids=ids, probs=probs,
+ strata=strata,variables=variables,fpc=fpc,nest=nest,
+ check.strata=check.strata, weights=weights,data=d,pps=pps,...))
+ rval <- list(designs=designs, call=sys.call(-1))
+ class(rval) <- "svyimputationList"
+ rval
+ }
+
+svrepdesign.imputationList<-function(variables=NULL, repweights,weights,data,mse=getOption("survey.replicates.mse"),...){
+ ## dispatch on data=
+ if (!is.null(variables) && !inherits(variables,"imputationList"))
+ stop("'variables' must also be an 'imputationList' (or NULL)")
+
+
+ if(!is.null(variables)){
+ if (inherits(repweights,"imputationList")){
+ designs <- mapply(function(v,d,r) svrepdesign(variables=v, repweights=r, weights=weights,data=NULL,mse=mse,...),
+ variables$imputations,data$imputations, repweights$imputations,SIMPLIFY=FALSE)
+ } else {
+ designs <- mapply(function(d,v) svrepdesign(variables=v, repweights=repweights, weights=weights,data=d,mse=mse,...),
+ data$imputations,variables$imputations,SIMPLIFY=FALSE)
+ }
+ }else{
+ if (inherits(repweights,"imputationList")){
+ designs <- mapply(function(d,r) svrepdesign(repweights=r, weights=weights,data=NULL,mse=mse,...),
+ data$imputations, repweights$imputations,SIMPLIFY=FALSE)
+ } else {
+ designs <- lapply(data$imputations, function(d) svrepdesign( repweights=repweights, weights=weights,data=d,mse=mse,...))
+ }
+ }
+ rval <- list(designs=designs, call=sys.call(-1))
+ class(rval) <- "svyimputationList"
+ rval
+}
+
+svydesign.DBimputationList<-function(ids, probs = NULL, strata = NULL,
+ variables = NULL, fpc = NULL, data, nest = FALSE,
+ check.strata = !nest, weights = NULL, ...){
+
+ design.vars<-c(all.vars(ids), all.vars(probs), all.vars(strata),all.vars(fpc), all.vars(weights))
+ design.query<-paste("select", paste(design.vars,collapse=","), "from", data$imputations[1])
+ if (data$db$dbtype=="ODBC")
+ design.data<-RODBC::sqlQuery(data$db$connection, design.query)
+ else
+ design.data<-DBI::dbGetQuery(data$db$connection, design.query)
+
+ rval<-list()
+ rval$design<-svydesign(ids=ids, probs=probs, strata=strata, data=design.data,
+ fpc=fpc, variables=variables, nest=nest,check.strata=check.strata,
+ weights=weights)
+ class(rval$design)<-c(if(data$db$dbtype=="ODBC") "ODBCsvydesign" else "DBIsvydesign", class(rval$design))
+
+ rval$design$updates<-data$updates
+ rval$db<-data$db
+ rval$imputations<-data$imputations
+ rval$variables<-NULL
+ rval$call<-sys.call(-1)
+ class(rval)<-"svyDBimputationList"
+ rval
+}
+
+print.svyDBimputationList<-function(x,...){
+ cat("DB-backed Multiple (",length(x$imputations),") imputations: ",sep="")
+ print(x$call)
+}
+
+print.svyimputationList<-function(x,...){
+ cat("Multiple (",length(x$designs),") imputations: ",sep="")
+ print(x$call)
+}
+
+dim.svyimputationList<-function(x){
+ c(dim(x$designs[[1]]),length(x$designs))
+}
+
+dimnames.svyimputationList<-function(x){
+ c(dimnames(x$designs[[1]]),list(paste("imputation",1:length(x$designs))))
+}
+
+subset.svyimputationList<-function(x, subset,...){
+ n<-nrow(x$designs[[1]])
+ e<-substitute(subset)
+ r<-eval(e,x$designs[[1]]$variables, parent.frame())
+ r <- r & !is.na( r )
+ x$designs[[1]]<-x$designs[[1]][r,]
+ same<-TRUE
+ for(i in 2:length(x$designs)){
+ r1<-eval(e,x$designs[[i]]$variables, parent.frame())
+ x$designs[[i]]<-x$designs[[i]][r1,]
+ r1<-r1 & !is.na(r1)
+ if (any(r!=r1)) {
+ same<-FALSE
+ }
+ }
+ if (!same) warning('subset differed between imputations')
+
+ x$call<-sys.call(-1)
+ x
+ }
+
+subset.svyDBimputationList<-function(x, subset,...,all=FALSE){
+ n<-nrow(x$designs[[1]])
+ e<-substitute(subset)
+ df<-getvars(all.vars(e), x$db$connection, x$imputations[1],
+ db.only=FALSE, updates=x$design$updates)
+ r<-eval(e,df, parent.frame())
+ same<-TRUE
+ for(i in 2:length(x$imputations)){
+ df<-getvars(all.vars(e), x$db$connection, x$imputations[i],
+ db.only=FALSE, updates=x$design$updates)
+
+ r1<-eval(e,df, parent.frame())
+ r1<-r1 & !is.na(r1)
+ if (any(r!=r1)) {
+ same<-FALSE
+ if (all) r <- r & r1 else r<- r | r1
+ }
+ }
+ if (!same) warning('subset differed between imputations')
+ x$design<-x$design[r,]
+ x$call<-sys.call(-1)
+ x
+ }
+
+with.svyimputationList<-function (data, expr, fun, ..., multicore=getOption("survey.multicore")) {
+ pf <- parent.frame()
+ if (multicore && !requireNamespace("parallel",quietly=TRUE))
+ multicore<-FALSE
+
+ if (!is.null(match.call()$expr)) {
+ expr <- substitute(expr)
+ expr$design<-as.name(".design")
+ if (multicore){
+ results <- parallel::mclapply(data$designs,
+ function(.design) {
+ eval(expr, list(.design=.design),enclos=pf)
+ }
+ )
+ } else{
+ results <- lapply(data$designs,
+ function(.design) {
+ eval(expr, list(.design=.design),enclos=pf)
+ }
+ )
+
+ }
+ }
+ else {
+ results <- lapply(data$designs, fun, ...)
+ }
+ if (all(sapply(results, inherits, what = "imputationResult"))) {
+ class(results) <- "imputationResultList"
+ results$call <- sys.call(-1)
+ }
+ else {
+ attr(results, "call") <- sys.call(-1)
+ }
+ results
+ }
+
+
+with.svyDBimputationList<-function (data, expr, ..., multicore=getOption("survey.multicore")) {
+ pf <- parent.frame()
+ if (!is.null(match.call()$expr)) {
+ expr <- substitute(expr)
+ expr$design<-as.name(".design")
+ if (multicore && !requireNamespace("parallel")) multicore <-FALSE
+ if (multicore){
+ results<-parallel::mclapply(data$imputations,
+ function(tablename) {
+ close(data)
+ .design<-data$design
+ db<-data$db
+ db$tablename<-tablename
+ .design$db<-db
+ .design<-open(.design)
+ rval<-eval(expr, list(.design=.design),enclos=pf)
+ close(.design)
+ rval
+ }
+ )
+ } else {
+ results <- lapply(data$imputations,
+ function(tablename) {
+ .design<-data$design
+ db<-data$db
+ db$tablename<-tablename
+ .design$db<-db
+ eval(expr, list(.design=.design),enclos=pf)
+ }
+ )
+ }
+ }
+ attr(results, "call") <- sys.call(-1)
+ results
+ }
+
+
+update.svyDBimputationList<-function(object, ...){
+ dots <- substitute(list(...))[-1]
+ newnames <- names(dots)
+
+ updates<-lapply(dots, function(dot){
+ list(inputs=all.vars(dot),expression=dot)
+ })
+
+ if (is.null(object$design$updates))
+ object$design$updates<-list(updates)
+ else
+ object$design$updates<-c(object$design$updates, list(updates))
+ object
+}
+
+update.svyimputationList<-function(object, ...){
+ dots <- substitute(list(...))[-1]
+ newnames <- names(dots)
+ for (i in seq(along = object$designs)) {
+ for (j in seq(along = dots)) {
+ object$designs[[i]]$variables[, newnames[j]] <- eval(dots[[j]],
+ object$designs[[i]]$variables, parent.frame())
+ }
+ }
+ object
+}
+
+close.svyDBimputationList<-function(con,...){
+ dbcon<-con$db$connection
+ if (is(dbcon,"DBIConnection"))
+ DBI::dbDisconnect(dbcon)
+ else
+ RODBC::odbcClose(dbcon)
+ invisible(con)
+}
+
+open.svyDBimputationList<-function(con,...){
+ if(con$db$dbtype=="ODBC"){
+ oldenc<-attr(con$db$connection)
+ con$db$connection<-RODBC::odbcReConnect(con$db$connection,...)
+ attr(con$db$connection,"encoding")<-oldenc
+ } else {
+ dbdriver<-DBI::dbDriver(con$db$dbtype)
+ con$db$connection<-DBI::dbConnect(dbdriver,dbname=con$db$dbname,...)
+ }
+ con
+}
diff --git a/R/svypredmeans.R b/R/svypredmeans.R
new file mode 100644
index 0000000..a42108c
--- /dev/null
+++ b/R/svypredmeans.R
@@ -0,0 +1,30 @@
+
+svypredmeans<-function(adjustmodel, groupfactor){
+
+ design<-eval(bquote(update(adjustmodel$survey.design, .groupfactor=.(groupfactor[[2]]))))
+ groups<-unique(model.frame(design)$.groupfactor)
+ groups<-groups[!is.na(groups)]
+ model<-update(adjustmodel, .~.+.groupfactor,design=design)
+ w<-weights(design,"sampling")
+
+ fits<-matrix(nrow=NROW(design),ncol=length(groups))
+ dg_deta<-matrix(nrow=length(coef(model)),ncol=length(groups))
+ for(i in 1:length(groups)){
+ mf<-model.frame(design)
+ mf$.groupfactor<-groups[i]
+ mu<-predict(model,newdata=mf,type="response",se.fit=FALSE)
+ eta<-predict(model,newdata=mf,type="link",se.fit=FALSE)
+ fits[,i]<-coef(mu)
+
+ mm<-model.matrix(terms(model),mf)
+ dg_deta[,i]<-t(colSums(w*model$family$mu.eta(eta)*mm))/sum(w)
+ }
+ colnames(fits)<-as.character(groups)
+ cond<-svymean(fits,design)
+ addvar<-t(dg_deta)%*%vcov(model)%*%dg_deta
+ vv<-addvar+attr(cond,"var")
+ attr(vv,"parts")<-list(addvar,attr(cond,"var"))
+ attr(cond,"var")<-vv
+ cond
+}
+
diff --git a/R/svyranktest.R b/R/svyranktest.R
new file mode 100644
index 0000000..dc0b0e0
--- /dev/null
+++ b/R/svyranktest.R
@@ -0,0 +1,107 @@
+
+
+svyranktest<-function(formula,design,test=c('wilcoxon','vanderWaerden','median',"KruskalWallis"),...){
+ UseMethod("svyranktest", design)
+}
+
+svyranktest.survey.design<-svyranktest.svyrep.design<-function(formula, design, test=c('wilcoxon','vanderWaerden','median',"KruskalWallis"),...)
+{
+ mf<-model.frame(formula,model.frame(design),na.action=na.omit)
+ if (!is.null(naa<-attr(mf,"na.action"))){
+ design<-design[-naa,]
+ mf<-model.frame(formula,model.frame(design),na.action=na.fail)
+ }
+ y<-mf[,1]
+ g<-mf[,2]
+
+ if (length(unique(g))!=2) {
+ return(multiranktest(formula,design, test,...))
+ }
+
+ if (is.character(test)) {
+ test<-match.arg(test)
+ testf<-switch(test, wilcoxon=,KruskalWallis=function(r,N) r/N,
+ vanderWaerden=function(r,N) qnorm(r/N),
+ median=function(r,N) as.numeric(r>N/2))
+ } else{
+ testf<-test
+ }
+ if (identical(test,"wilcoxon")) test<-"KruskalWallis"
+
+ ii<-order(y)
+ n<-length(y)
+ rankhat<-numeric(n)
+ w<-weights(design,"sampling")
+
+ N<-sum(w)
+ rankhat[ii]<-ave(cumsum(w[ii])-w[ii]/2,factor(y[ii]))
+ rankscore<-testf(rankhat,N)
+ m <- lm(rankscore~g, weights=w)
+ delta<-coef(m)[2]
+ xmat<-model.matrix(m)
+ infn<- (xmat*(rankscore-fitted(m)))%*%summary(m)$cov.unscaled
+ tot.infn<-svytotal(infn,design)
+ if (is.character(test))
+ method<-paste("Design-based",test,"test")
+ else if (!is.null(attr(test,"name")))
+ method<-paste("Design-based",attr(test,"name"),"test")
+ else method<-"Design-based rank test"
+
+ rval <- list(statistic = coef(m)[2]/SE(tot.infn)[2], parameter = degf(design) -
+ 1, estimate = coef(m)[2], null.value = 0, alternative = "two.sided",
+ method = method, data.name = deparse(formula))
+ rval$p.value <- 2 * pt(-abs(rval$statistic), df = rval$parameter)
+ names(rval$statistic) <- "t"
+ names(rval$parameter) <- "df"
+ names(rval$estimate) <- "difference in mean rank score"
+ names(rval$null.value) <- "difference in mean rank score"
+ class(rval) <- "htest"
+ rval
+}
+
+multiranktest<-function(formula,design,test=c('wilcoxon','vanderWaerden','median','KruskalWallis'),...){
+ mf<-model.frame(formula,model.frame(design),na.action=na.omit)
+ if (!is.null(naa<-attr(mf,"na.action"))){
+ design<-design[-naa,]
+ mf<-model.frame(formula,model.frame(design),na.action=na.fail)
+ }
+ y<-mf[,1]
+ g<-mf[,2]
+
+ if (is.character(test)) {
+ test<-match.arg(test)
+ testf<-switch(test, wilcoxon=,KruskalWallis=function(r,N) r/N,
+ vanderWaerden=function(r,N) qnorm(r/N),
+ median=function(r,N) as.numeric(r>N/2))
+ } else{
+ testf<-test
+ }
+ if (identical(test,"wilcoxon")) test<-"KruskalWallis"
+
+ ii<-order(y)
+ n<-length(y)
+ rankhat<-numeric(n)
+ w<-weights(design,"sampling")
+
+ N<-sum(w)
+ rankhat[ii]<-ave(cumsum(w[ii])-w[ii]/2,factor(y[ii]))
+ rankscore<-testf(rankhat,N)
+ m <- glm(rankscore~factor(g),weights=w)
+ V<-svy.varcoef(m,design)
+ ndf<-length(unique(g))-1
+ beta<-coef(m)[-1]
+ V<-V[-1,-1]
+ chisq<-beta%*%solve(V,beta)
+ ddf<-degf(design)-ndf
+ if (is.character(test))
+ method<-paste("Design-based",test,"test")
+ else if (!is.null(attr(test,"name")))
+ method<-paste("Design-based",attr(test,"name"),"test")
+ else method<-"Design-based rank test"
+ names(chisq)<-"Chisq"
+ names(ndf)<-"df"
+ rval<-list(parameter=chisq,statistic=ndf,ddf=ddf,p.value=pf(chisq/ndf,ndf,ddf,lower.tail=FALSE),
+ method=method, data.name = deparse(formula))
+ class(rval)<-"htest"
+ rval
+}
diff --git a/R/svysmooth.R b/R/svysmooth.R
new file mode 100644
index 0000000..c130027
--- /dev/null
+++ b/R/svysmooth.R
@@ -0,0 +1,143 @@
+svysmooth<-function(formula,design,...) UseMethod("svysmooth", design)
+svysmooth.default<-function(formula, design,method=c("locpoly","quantreg"),bandwidth=NULL,quantile,df=4,...){
+ switch(match.arg(method),
+ locpoly=svylocpoly(formula,design,bandwidth=bandwidth,...),
+ quantreg=svyrqss(formula,design,quantile=quantile,df=df,...)
+ )
+}
+
+fitted.rq<-function(object,...) object$x%*% object$coefficients/object$weights
+
+svyrqss<-function(formula,design,quantile=0.5,df=4,...){
+ mf<-model.frame(formula, model.frame(design), na.action=na.omit)
+ naa<-attr(mf,"na.action")
+
+ tt<-attr(terms(formula),"term.labels")
+ df<-rep(df, length=length(tt))
+ quantile<-rep(quantile, length=length(tt))
+
+ if (length(formula)==3){
+ density<-FALSE
+ } else {
+ density<-TRUE
+ stop("type='quantreg' does not do densities")
+ }
+
+ w<-weights(design,type="sampling")
+ if (length(naa)) w<-w[-naa]
+ environment(formula)<-environment()
+
+ ll<-vector("list", length(tt))
+ for(i in 1:length(tt)){
+ termi<-as.name(tt[i])
+ ff<-eval(bquote(update(formula,.~splines::bs(.(termi),df=.(df[i])))))
+ rqfit<-quantreg::rq(ff, tau=quantile[i],weights=w,data=mf,...)
+ xx<-mf[,i+1]
+ oo<-order(xx)
+ ll[[i]]<-list(x=xx[oo],y=fitted.rq(rqfit)[oo])
+ }
+ names(ll)<-attr(terms(formula),"term.labels")
+
+ attr(ll,"call")<-sys.call(-2)
+ attr(ll,"density")<-density
+ if(density)
+ attr(ll,"ylab")<-"Density"
+ else
+ attr(ll,"ylab")<-deparse(formula[[2]])
+
+ class(ll)<-"svysmooth"
+
+ ll
+
+}
+
+svylocpoly<-function(formula, design, ngrid=401, xlim=NULL,
+ ylim=NULL, bandwidth=NULL,...){
+
+ mf<-model.frame(formula,model.frame(design))
+ mm<-model.matrix(terms(formula),mf)
+ if(attr(terms(formula),"intercept"))
+ mm<-mm[,-1,drop=FALSE]
+
+ naa<-attr(mf,"na.action")
+
+
+ if (length(formula)==3){
+ Y<-model.response(mf)
+ density<-FALSE
+ } else density<-TRUE
+
+
+ if (is.null(xlim)){
+ xlim<-apply(mm,2,range)
+ }
+ if (!is.matrix(xlim))
+ xlim<-matrix(xlim,nrow=2)
+
+
+ if (is.null(bandwidth)){
+ bandwidth<-numeric(ncol(mm))
+ for(i in 1:ncol(mm)){
+ bandwidth[i]<-if(density) KernSmooth::dpik(mm[,i],gridsize=ngrid) else KernSmooth::dpill(mm[,i],Y,gridsize=ngrid)
+ }
+ } else {
+ bandwidth<-rep(bandwidth, length=ncol(mm))
+ }
+
+ w<-weights(design,type="sampling")
+ if (length(naa)) w<-w[-naa]
+
+ ll<-vector("list", ncol(mm))
+ for(i in 1:NCOL(mm)){
+ gx<-seq(min(xlim[,i]), max(xlim[,i]), length=ngrid)
+ nx<-rowsum(c(rep(0,ngrid),w), c(1:ngrid, findInterval(mm[,i],gx)))
+ if (density){
+ ll[[i]]<-KernSmooth::locpoly(rep(1,ngrid),nx*ngrid/(diff(xlim[,i])*sum(w)),
+ binned=TRUE, bandwidth=bandwidth[i], range.x=xlim[,i])
+ }else{
+ ny<-rowsum(c(rep(0,ngrid), Y*w), c(1:ngrid, findInterval(mm[,i],gx)))
+ ll[[i]]<-KernSmooth::locpoly(nx, ny, binned=TRUE, bandwidth=bandwidth[i], range.x=xlim[,i])
+ }
+ names(ll)<-attr(terms(formula),"term.labels")
+ }
+ attr(ll,"call")<-sys.call(-2)
+ attr(ll,"density")<-density
+ if(density)
+ attr(ll,"ylab")<-"Density"
+ else
+ attr(ll,"ylab")<-deparse(formula[[2]])
+
+ class(ll)<-"svysmooth"
+
+ ll
+
+}
+
+print.svysmooth<-function(x,...){
+ if(attr(x,"density"))
+ cat("Density estimate: :")
+ else
+ cat("Scatterplot smoother :")
+ print(attr(x,"call"))
+ invisible(x)
+}
+
+
+
+plot.svysmooth<-function(x, which=NULL,type="l",xlabs=NULL,ylab=NULL,...){
+ if (is.null(which))
+ which<-seq(length=length(x))
+ if (is.character(which))
+ which<-match(which,names(x))
+
+ if(is.null(xlabs)) xlabs<-names(x)[which]
+ if(is.null(ylab)) ylab<-attr(x,"ylab")
+
+ for(i in seq(length=length(which)))
+ plot(x[[which[i]]], type=type, xlab=xlabs[i], ylab=ylab, ...)
+ invisible(NULL)
+}
+
+lines.svysmooth<-function(x,which=NULL,...){
+ for(i in names(x)) lines(x[[i]],...)
+}
diff --git a/R/svyttest.R b/R/svyttest.R
new file mode 100644
index 0000000..e28f713
--- /dev/null
+++ b/R/svyttest.R
@@ -0,0 +1,105 @@
+svyttest<-function(formula, design,...) UseMethod("svyttest",design)
+
+svyttest.default<-function(formula, design, ...){
+ if (formula[[3]]==1 || formula[[3]]==0){
+ ## one-sample
+ tt <- eval(bquote(svymean(~.(formula[[2]]),design)))
+ rval<-list(statistic=coef(tt)[1]/SE(tt)[1],
+ parameter=degf(design),
+ estimate=coef(tt)[1],
+ null.value=0,
+ alternative="two.sided",
+ method="Design-based one-sample t-test",
+ data.name=deparse(formula))
+ rval$p.value<-2*pt(-abs(rval$statistic),df=rval$parameter)
+ names(rval$statistic)<-"t"
+ names(rval$parameter)<-"df"
+ names(rval$estimate)<-"mean"
+ names(rval$null.value)<-"mean"
+ class(rval)<-"htest"
+ } else {
+ ## two-sample
+ m <- eval(bquote(svyglm(formula,design, family=gaussian())))
+ rval<-list(statistic=coef(m)[2]/SE(m)[2],
+ parameter=degf(design)-1,
+ estimate=coef(m)[2],
+ null.value=0,
+ alternative="two.sided",
+ method="Design-based t-test",
+ data.name=deparse(formula))
+ rval$p.value<-2*pt(-abs(rval$statistic),df=rval$parameter)
+ names(rval$statistic)<-"t"
+ names(rval$parameter)<-"df"
+ names(rval$estimate)<-"difference in mean"
+ names(rval$null.value)<-"difference in mean"
+ class(rval)<-"htest"
+ }
+ return(rval)
+
+}
+
+expit<-function(eta) exp(eta)/(1+exp(eta))
+
+svyciprop<-function(formula, design, method=c("logit","likelihood","asin","beta","mean","xlogit"),
+ level=0.95,df=degf(design),...) {
+ method<-match.arg(method)
+ if (method=="mean"){
+ m<-eval(bquote(svymean(~as.numeric(.(formula[[2]])),design,...)))
+ ci<-as.vector(confint(m,1,level=level,df=df,...))
+ rval<-coef(m)[1]
+ attr(rval,"var")<-vcov(m)
+ } else if (method=="asin"){
+ m<-eval(bquote(svymean(~as.numeric(.(formula[[2]])),design,...)))
+ names(m)<-1
+ xform<-svycontrast(m,quote(asin(sqrt(`1`))))
+ ci<-sin(as.vector(confint(xform,1,level=level,df=df,...)))^2
+ rval<-coef(m)[1]
+ attr(rval,"var")<-vcov(m)
+ } else if (method=="xlogit"){
+ m<-eval(bquote(svymean(~as.numeric(.(formula[[2]])),design,...)))
+ names(m)<-1
+ xform<-svycontrast(m,quote(log(`1`/(1-`1`))))
+ ci<-expit(as.vector(confint(xform,1,level=level,df=df,...)))
+ rval<-coef(m)[1]
+ attr(rval,"var")<-vcov(m)
+ } else if (method=="beta"){
+ m<-eval(bquote(svymean(~as.numeric(.(formula[[2]])),design,...)))
+ n.eff <- coef(m)*(1-coef(m))/vcov(m)
+ rval<-coef(m)[1]
+ attr(rval,"var")<-vcov(m)
+ alpha<-1-level
+ n.eff<-n.eff*( qt(alpha/2, nrow(design)-1)/qt(alpha/2, degf(design)) )^2
+ ci<-c(qbeta(alpha/2, n.eff*rval,n.eff*(1-rval)+1),
+ qbeta(1-alpha/2, n.eff*rval+1, n.eff*(1-rval)))
+ } else {
+ m<-eval(bquote(svyglm(.(formula[[2]])~1,design, family=quasibinomial)))
+ cimethod<-switch(method, logit="Wald",likelihood="likelihood")
+ ci<-suppressMessages(as.numeric(expit(confint(m,1,level=level,method=cimethod,ddf=df))))
+ rval<-expit(coef(m))[1]
+ attr(rval,"var")<-vcov(eval(bquote(svymean(~as.numeric(.(formula[[2]])),design,...))))
+ }
+ halfalpha<-(1-level)/2
+ names(ci)<-paste(round(c(halfalpha,(1-halfalpha))*100,1),"%",sep="")
+ names(rval)<-deparse(formula[[2]])
+ attr(rval,"ci")<-ci
+ class(rval)<-"svyciprop"
+ rval
+}
+
+confint.svyciprop<-function(object,parm,level=NULL,...){
+ if (!is.null(level)) stop("need to re-run svyciprop to specify level")
+ rval<-t(as.matrix(attr(object,"ci")))
+ rownames(rval)<-names(object)
+ rval
+}
+
+coef.svyciprop<-function(object,...) object
+
+vcov.svyciprop<-function(object,...) attr(object,"var")
+
+print.svyciprop<-function(x,digits=max(3,getOption("digits")-4),...){
+ m <- cbind(coef(x), confint(x))
+ printCoefmat(m,digits=digits)
+ invisible(x)
+}
+
diff --git a/R/sysdata.rda b/R/sysdata.rda
new file mode 100644
index 0000000..f987d23
Binary files /dev/null and b/R/sysdata.rda differ
diff --git a/R/transform.R b/R/transform.R
new file mode 100644
index 0000000..cd80d70
--- /dev/null
+++ b/R/transform.R
@@ -0,0 +1,9 @@
+## another name for update()
+transform.survey.design<-function(`_data`, ...) update(`_data`,...)
+transform.svyrep.design<-function(`_data`, ...) update(`_data`,...)
+transform.twophase<-function(`_data`, ...) update(`_data`,...)
+transform.twophase2<-function(`_data`, ...) update(`_data`,...)
+transform.ODBCsvydesign<-function(`_data`, ...) update(`_data`,...)
+transform.DBIsvydesign<-function(`_data`, ...) update(`_data`,...)
+transform.svyimputationList<-function(`_data`, ...) update(`_data`,...)
+
diff --git a/R/twophase.R b/R/twophase.R
new file mode 100644
index 0000000..3dd40a8
--- /dev/null
+++ b/R/twophase.R
@@ -0,0 +1,772 @@
+##
+##
+twophase<-function(id,strata=NULL, probs=NULL, weights=NULL, fpc=NULL,
+ subset, data, method=c("full","approx","simple")){
+ method<-match.arg(method)
+ if(method=="full") {
+ if (!is.null(weights)) stop("weights not accepted by method='full'")
+ return(twophase2(id=id, strata=strata, probs=probs, fpc=fpc,subset=subset,data=data))
+ }
+
+ d1<-svydesign(ids=id[[1]],strata=strata[[1]],weights=weights[[1]],
+ probs=probs[[1]],fpc=fpc[[1]],data=data)
+
+ if(inherits(subset,"formula"))
+ subset<-eval.parent(model.frame(subset,data=data,na.action=na.pass))[[1]]
+
+ if(!is.logical(subset) && sort(unique(subset))==c(0,1))
+ subset<-as.logical(subset)
+
+ if (any(is.na(subset))) stop("missing values in 'subset'")
+
+ d1s<-svydesign(ids=id[[1]],strata=strata[[1]],weights=weights[[1]],
+ probs=probs[[1]],fpc=fpc[[1]],data=data[subset,])
+ d1s$prob<-d1$prob[subset]
+ d1s$allprob<-d1$allprob[subset,,drop=FALSE]
+
+ ##if (NCOL(d1s$allprob)>1)
+ ## stop("Can't handle multistage sampling at phase 1 (yet)")
+
+ ## work out phase-two fpc
+ if (is.null(fpc[[2]])){
+ complete.vars<-names(data)[apply(data, 2, function(v) all(!is.na(v)))]
+ if (all(c(all.vars(id[[2]]), all.vars(strata[[2]])) %in% complete.vars)){
+ dfpc<-svydesign(ids=id[[2]], strata=strata[[2]], data=data, probs=NULL)
+ popsize<-mapply(function(s,i) ave(!duplicated(i),s,FUN=sum), dfpc$strata, dfpc$cluster)
+ rm(dfpc)
+ } else {
+ warning("Second-stage fpc not specified and not computable")
+ popsize<-NULL
+ }
+ } else popsize<-NULL
+
+ d2<-svydesign(ids=id[[2]], strata=strata[[2]], probs=probs[[2]],
+ weights=weights[[2]], fpc=fpc[[2]], data=data[subset,])
+
+ ## ugly hack to get nicer labels
+ if(!is.null(fpc[[2]])){
+ d2call<-bquote(svydesign(ids=.(id[[2]]),strata=.(strata[[2]]), probs=.(probs[[2]]),
+ weights=.(weights[[2]]), fpc=.(fpc[[2]])))
+ } else{
+ d2call<-bquote(svydesign(ids=.(id[[2]]),strata=.(strata[[2]]), probs=.(probs[[2]]),
+ weights=.(weights[[2]]), fpc=`*phase1*`))
+ }
+ for(i in names(d2call)[-1])
+ d2call[[i]]<-d2call[[i]]
+ d2$call<-d2call
+ d1call<-bquote(svydesign(ids=.(id[[1]]), strata=.(strata[[1]]), probs=.(probs[[1]]),
+ weights=.(weights[[1]]), fpc=.(fpc[[1]])))
+ for(i in names(d1call)[-1])
+ d1call[[i]]<-d1call[[i]]
+ d1$call<-d1call
+
+
+ ## Add phase 2 fpc and probs if they were computed rather than specified.
+ if (!is.null(popsize))
+ d2$fpc<-as.fpc(popsize[subset,,drop=FALSE],d2$strata,d2$cluster)
+ if(is.null(probs[[2]]) && is.null(weights[[2]]) && !is.null(d2$fpc$popsize)){
+ d2$allprob<-1/weights(d2$fpc,final=FALSE)
+ d2$prob<-apply(as.data.frame(d2$allprob),1,prod)
+ }
+
+ d2$variables<-NULL
+ rval<-list(phase1=list(full=d1,sample=d1s),
+ phase2=d2,
+ subset=subset)
+ rval$prob<-rval$phase1$sample$prob
+
+ ## Are phase 2 PSUs the same as Phase 1 USUs, or smaller?
+ rval$samescale<- !any(duplicated(d1s$cluster[,NCOL(d1s$cluster)][!duplicated(d2$cluster[,1])]))
+
+ ## For each phase 1 sampling unit, need probability of being represented
+ ## at phase 2.
+ nunique<-function(x) sum(!duplicated(x))
+ m<-NCOL(rval$phase1$sample$cluster)
+ if(d2$has.strata){
+ if (inherits(strata[[2]],"formula"))
+ sa<-eval(attr(terms(strata[[2]]),"variables")[[2]],d1$variables)
+ else
+ sa<-d1$strata[,1]
+ cm<-rval$phase1$full$cluster[,m]
+ if (nunique(sa)!=nunique(sa[subset]))
+ stop("Some phase-2 strata have zero sampling fraction")
+ rval$usu<-ave(cm[subset],sa[subset],FUN=nunique)/ave(cm,sa,FUN=nunique)[subset]
+ } else {
+ rval$usu<-drop(with(rval$phase1$sample,ave(cluster[,m], strata[,m], FUN=nunique))/rval$phase1$full$fpc$sampsize[rval$subset])
+ }
+
+## if (any(rval$usu<1) && any(duplicated(d1$cluster[,1])))
+## stop("Phase 1 design must either be element sampling or have all phase 1 sampling units in phase 2")
+
+ if (length(rval$phase1$sample$prob)==length(d2$prob))
+ rval$prob<-rval$phase1$sample$prob*d2$prob
+ else{
+ rval$prob<-rep(Inf,length(rval$phase1$sample$prob))
+ rval$prob[subset]<-rval$prob[subset]*d2$prob
+ }
+ rval$call<-sys.call()
+ class(rval) <- c("twophase","survey.design")
+ rval
+}
+
+print.twophase<-function(x,...){
+ cat("Two-phase design: ")
+ print(x$call)
+ cat("Phase 1:\n")
+ print(x$phase1$full)
+ cat("Phase 2:\n")
+ print(x$phase2)
+ invisible(x)
+}
+
+summary.twophase<-function(object,...){
+ class(object)<-"summary.twophase"
+ object
+}
+
+print.summary.twophase<-function(x,...,varnames=TRUE){
+ cat("Two-phase design: ")
+ print(x$call)
+ cat("Phase 1:\n")
+ print(x$phase1$full,design.summaries=TRUE,varnames=FALSE)
+ cat("Phase 2:\n")
+ print(x$phase2,design.summaries=TRUE, varnames=FALSE)
+ if (varnames){
+ cat("Data variables:\n")
+ print(names(x$phase1$full$variables))
+ }
+ invisible(x)
+}
+
+twophasevar<-function(x,design){
+ d1 <- design$phase1$sample
+ if (NROW(x)==length(design$usu)){
+ ph2pr<-design$usu
+ if (any(design$prob==Inf))
+ x[is.na(x)]<-0
+ }else{
+ x[is.na(x)]<-0
+ ph2pr<-rep(1,NROW(x))
+ ph2pr[design$subset]<-design$usu
+ }
+ ## compute phase 1 variance
+ vphase1 <- svyrecvar.phase1(x,d1$cluster, d1$strata, d1$fpc,
+ postStrata=d1$postStrata,
+ ph2prob=ph2pr,
+ nPSUfull=design$phase1$full$fpc$sampsize[design$subset,,drop=FALSE])
+
+ ## is phase 2 sampling whole phase 1 units or subsampling within units?
+ if (design$samescale)
+ u2<-x
+ else
+ u2<-x*sqrt(d1$prob)
+
+ u2[is.na(u2)]<-0
+ ## compute phase 2 variance
+ vphase2 <- with(design, svyrecvar(u2, phase2$cluster, phase2$strata,
+ phase2$fpc, postStrata=phase2$postStrata))
+ rval <- vphase1+vphase2
+ attr(rval, "phases")<-list(phase1=vphase1, phase2=vphase2)
+ rval
+}
+
+svyrecvar.phase1<-function(x, clusters, stratas, fpcs, postStrata=NULL,
+ lonely.psu=getOption("survey.lonely.psu"),
+ one.stage=getOption("survey.ultimate.cluster"),
+ ph2prob, nPSUfull){
+
+ x<-as.matrix(x)
+ cal<-NULL
+
+ ## FIXME: calibration of phase 1 not yet implemented.
+ ## Remove post-stratum means, which may cut across clusters
+ ## Also center the data using any "g-calibration" models
+ if(!is.null(postStrata)){
+ stop("calibration of phase 1 not yet implemented")
+ for (psvar in postStrata){
+ if (inherits(psvar, "greg_calibration")) {
+ if (psvar$stage==0){
+ ## G-calibration at population level
+ x<-qr.resid(psvar$qr,x/psvar$w)*psvar$w
+ } else {
+ ## G-calibration within clusters
+ cal<-c(cal, list(psvar))
+ }
+ } else {
+ ## ordinary post-stratification
+ psw<-attr(psvar, "weights")
+ postStrata<-as.factor(psvar)
+ psmeans<-rowsum(x/psw,psvar,reorder=TRUE)/as.vector(table(factor(psvar)))
+ x<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw
+ }
+ }
+ }
+
+ multistage.phase1(x, clusters,stratas,fpcs$sampsize, fpcs$popsize,
+ lonely.psu=getOption("survey.lonely.psu"),
+ one.stage=one.stage,stage=1,cal=cal,ph2prob=ph2prob,
+ nPSUfull=nPSUfull)
+}
+
+
+multistage.phase1<-function(x, clusters, stratas, nPSUs, fpcs,
+ lonely.psu=getOption("survey.lonely.psu"),
+ one.stage=FALSE,stage,cal,ph2prob, nPSUfull){
+
+ n<-NROW(x)
+
+
+ v <- onestage.phase1(x,stratas[,1], clusters[,1], nPSUs[,1],
+ fpcs[,1], lonely.psu=lonely.psu,stage=stage,cal=cal,
+ ph2prob=ph2prob, nPSUfull=nPSUfull[,1])
+
+ if (one.stage!=TRUE && !is.null(fpcs) && NCOL(clusters)>1) {
+ v.sub<-by(1:n, list(as.numeric(clusters[,1])), function(index){
+ ## residuals for G-calibration using population information
+ ## only on clusters at this stage.
+ for(cali in cal){
+ if (cali$stage != stage)
+ next
+ j<-match(clusters[index,1],cali$index)
+ if (length(unique(j))!=1)
+ stop("Internal problem in g-calibration data: stage",stage,
+ ", cluster", j)
+ j<-j[[1]]
+ x[index,]<-qr.resid(cali$qr[[j]], x[index,,drop=FALSE]/cali$w[[j]])*cali$w[[j]]
+ }
+ multistage.phase1(x[index,,drop=FALSE], clusters[index,-1,drop=FALSE],
+ stratas[index,-1,drop=FALSE], nPSUs[index,-1,drop=FALSE],
+ fpcs[index,-1,drop=FALSE],
+ lonely.psu=lonely.psu,one.stage=one.stage-1,
+ stage=stage+1,cal=cal,ph2prob=ph2prob[index],
+ nPSUfull=nPSUfull[index,-1,drop=FALSE])*nPSUfull[index[1],1]/fpcs[index[1],1]
+ })
+
+ for(i in 1:length(v.sub))
+ v<-v+v.sub[[i]]
+ }
+ v
+}
+
+
+onestrat.phase1<-function(x,cluster,nPSU,fpc, lonely.psu,stratum=NULL,
+ stage=1,cal,ph2prob, nPSUfull){
+ x<-rowsum(x, cluster)
+ ph2prob<-ph2prob[!duplicated(cluster)]
+
+ nsubset<-nrow(x)
+ if (nsubset<nPSU)
+ x<-rbind(x,matrix(0,ncol=ncol(x),nrow=nPSU-nrow(x)))
+
+ ph2prob<-c(ph2prob,rep(1,nPSU-nsubset))
+ xcenter<-colMeans(x*nPSU/nPSUfull)
+
+ x<-x*ph2prob
+
+
+ if (is.null(fpc))
+ f<-1
+ else
+ f<-ifelse(fpc==Inf, 1, (fpc-nPSUfull)/fpc)
+
+ if (lonely.psu!="adjust" || nsubset>1 ||
+ (nPSU>1 && !getOption("survey.adjust.domain.lonely")))
+ x<-sweep(x, 2, xcenter, "-")
+
+ if (nPSU>1)
+ scale<-f*nPSUfull/(nPSUfull-1)
+ else
+ scale<-f
+
+ if (nsubset==1 && nPSU>1){
+ warning("Stratum (",stratum,") has only one PSU at stage ",stage)
+ if (lonely.psu=="average" && getOption("survey.adjust.domain.lonely"))
+ scale<-NA
+ }
+
+ if (nPSU>1){
+ return(crossprod(x/sqrt(ph2prob))*scale)
+ } else if (f<0.0000001) ## certainty PSU
+ return(0*crossprod(x/sqrt(ph2prob)))
+ else {
+ rval<-switch(lonely.psu,
+ certainty=scale*crossprod(x/sqrt(ph2prob)),
+ remove=scale*crossprod(x/sqrt(ph2prob)),
+ adjust=scale*crossprod(x/sqrt(ph2prob)),
+ average=NA*crossprod(x/sqrt(ph2prob)),
+ fail= stop("Stratum (",stratum,") has only one PSU at stage ",stage),
+ stop("Can't handle lonely.psu=",lonely.psu)
+ )
+ rval
+ }
+}
+
+
+onestage.phase1<-function(x, strata, clusters, nPSU, fpc,
+ lonely.psu=getOption("survey.lonely.psu"),stage=0,
+ cal,ph2prob, nPSUfull){
+ stratvars<-tapply(1:NROW(x), list(factor(strata)), function(index){
+ onestrat.phase1(x[index,,drop=FALSE], clusters[index],
+ nPSU[index][1], fpc[index][1],
+ lonely.psu=lonely.psu,stratum=strata[index][1], stage=stage,cal=cal,
+ ph2prob=ph2prob[index], nPSUfull=nPSUfull[index][1])
+ })
+ p<-NCOL(x)
+ nstrat<-length(unique(strata))
+ nokstrat<-sum(sapply(stratvars,function(m) !any(is.na(m))))
+ apply(array(unlist(stratvars),c(p,p,length(stratvars))),1:2,sum,na.rm=TRUE)*nstrat/nokstrat
+}
+
+
+svytotal.twophase<-function(x,design, na.rm=FALSE, deff=FALSE,...){
+
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$phase1$sample$variables,
+ na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ } else {
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else {
+ if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ }
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ if(length(nas)>length(design$prob))
+ x<-x[nas==0,,drop=FALSE]
+ else
+ x[nas>0,]<-0
+ }
+
+ N<-sum(1/design$prob)
+ total <- colSums(x/as.vector(design$prob),na.rm=na.rm)
+ class(total)<-"svystat"
+ attr(total, "var")<-v<-twophasevar(x/design$prob,design)
+ attr(total,"statistic")<-"total"
+
+ if (is.character(deff) || deff){
+ nobs<-NROW(design$cluster)
+ if (deff=="replace")
+ vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design)^2)*(N-nobs)/N
+ else
+ vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design)^2)
+ attr(total, "deff")<-v/vsrs
+ }
+
+
+ return(total)
+}
+
+svymean.twophase<-function(x,design, na.rm=FALSE,deff=FALSE,...){
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$phase1$sample$variables
+ ,na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ }
+ else {
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else {
+ if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ }
+
+
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ if(length(nas)>length(design$prob))
+ x<-x[nas==0,,drop=FALSE]
+ else
+ x[nas>0,]<-0
+ }
+
+ pweights<-1/design$prob
+ psum<-sum(pweights)
+ average<-colSums(x*pweights/psum)
+ x<-sweep(x,2,average)
+ v<-twophasevar(x*pweights/psum,design)
+ attr(average,"var")<-v
+ attr(average,"statistic")<-"mean"
+ class(average)<-"svystat"
+ if (is.character(deff) || deff){
+ nobs<-NROW(design$cluster)
+ if(deff=="replace"){
+ vsrs<-svyvar(x,design,na.rm=na.rm)/(nobs)
+ } else {
+ if(psum<nobs) {
+ vsrs<-NA*v
+ warning("Sample size greater than population size: are weights correctly scaled?")
+ } else{
+ vsrs<-svyvar(x,design,na.rm=na.rm)*(psum-nobs)/(psum*nobs)
+ }
+ }
+ attr(average, "deff")<-v/vsrs
+ }
+
+ return(average)
+}
+
+model.frame.twophase<-function(formula,phase=2,...){
+ if (phase==1)
+ formula$phase1$full$variables
+ else
+ formula$phase1$sample$variables
+}
+
+svyratio.twophase<-function(numerator=formula, denominator, design, separate=FALSE,na.rm=FALSE,formula,...){
+
+ if (separate){
+ strats<-sort(unique(design$phase2$strata[,1]))
+ if (!design$phase2$has.strata)
+ warning("Separate and combined ratio estimators are the same for unstratified designs")
+ rval<-list(ratios=lapply(strats,
+ function(s) {
+ tmp<-svyratio(numerator, denominator,
+ subset(design, design$phase2$strata[,1] %in% s),
+ separate=FALSE,...)
+ attr(tmp,"call")<-bquote(Stratum==.(s))
+ tmp}))
+ names(rval$ratios)<-strats
+
+ class(rval)<-c("svyratio_separate")
+ rval$call<-sys.call()
+ rval$strata<-strats
+ return(rval)
+ }
+
+ if (inherits(numerator,"formula"))
+ numerator<-model.frame(numerator,model.frame(design),na.action=na.pass)
+ else if(typeof(numerator) %in% c("expression","symbol"))
+ numerator<-eval(numerator, design$variables)
+ if (inherits(denominator,"formula"))
+ denominator<-model.frame(denominator,model.frame(design),na.action=na.pass)
+ else if(typeof(denominator) %in% c("expression","symbol"))
+ denominator<-eval(denominator, model.frame(design))
+
+ nn<-NCOL(numerator)
+ nd<-NCOL(denominator)
+
+ all<-cbind(numerator,denominator)
+ nas<-!complete.cases(all)
+ if (na.rm){
+ design<-design[!nas,]
+ all<-all[!nas,,drop=FALSE]
+ numerator<-numerator[!nas,,drop=FALSE]
+ denominator<-denominator[!nas,,drop=FALSE]
+ }
+ allstats<-svytotal(all, design)
+ rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/"))
+
+
+ vars<-matrix(ncol=nd,nrow=nn)
+ for(i in 1:nn){
+ for(j in 1:nd){
+ r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob)
+ vars[i,j]<-twophasevar(r*1/design$prob, design)
+ }
+ }
+ colnames(vars)<-names(denominator)
+ rownames(vars)<-names(numerator)
+ rval$var<-vars
+ attr(rval,"call")<-sys.call()
+ class(rval)<-"svyratio"
+ rval
+
+ }
+
+
+"[.twophase"<-function (x,i, ..., drop=TRUE){
+ if (!missing(i)){
+ if (is.calibrated(x$phase1$full) || is.calibrated(x$phase2) || !drop){
+ ## Set weights to zero: no memory saving possible
+ ## There should be an easier way to complement a subscript..
+ if (is.logical(i)){
+ x$prob[!i]<-Inf
+ x$phase2$prob[!i]<-Inf
+ } else if (is.numeric(i) && length(i)){
+ x$prob[-i]<-Inf
+ x$phase2$prob[-i]<-Inf
+ } else {
+ tmp<-x$prob[i,]
+ x$prob<-rep(Inf, length(x$prob))
+ x$prob[i,]<-tmp
+ }
+ index<-is.finite(x$prob)
+ psu<-!duplicated(x$phase2$cluster[index,1])
+ tt<-table(x$phase2$strata[index,1][psu])
+ if(any(tt==1)){
+ warning(sum(tt==1)," strata have only one PSU in this subset.")
+ }
+ } else {
+ ## subset everything.
+ x$prob<-x$prob[i]
+ if (is.logical(i))
+ x$subset[x$subset]<- i
+ else if (is.numeric(i) && length(i))
+ x$subset[which(x$subset)[-i]]<- FALSE
+ else
+ x$subset<-FALSE & x$subset
+ x$usu<-x$usu[i]
+ x$phase1$sample<-x$phase1$sample[i,...,drop=TRUE]
+ x$phase2<-x$phase2[i,...,drop=TRUE]
+ }
+ } else {
+ x$phase1$full<-x$phase1$full[,...]
+ x$phase1$sample<-x$phase1$sample[,...]
+ x$phase2<-x$phase2[,...]
+ }
+ x
+}
+
+dim.twophase<-function(x,...){
+ dim(x$phase1$sample$variables)
+}
+
+na.fail.twophase<-function(object,...){
+ tmp<-na.fail(object$phase1$sample$variables,...)
+ object
+}
+
+na.omit.twophase<-function(object,...){
+ tmp<-na.omit(object$phase1$sample$variables,...)
+ omit<-attr(tmp,"na.action")
+ if (length(omit)){
+ object<-object[-omit,]
+ object$phase1$sample$variables<-tmp
+ attr(object,"na.action")<-omit
+ }
+ object
+}
+
+na.exclude.twophase<-function(object,...){
+ tmp<-na.exclude(object$phase1$sample$variables,...)
+ exclude<-attr(tmp,"na.action")
+ if (length(exclude)){
+ object<-object[-exclude,]
+ object$phase1$sample$variables<-tmp
+ attr(object,"na.action")<-exclude
+ }
+ object
+}
+
+
+update.twophase<-function(object,...){
+
+ dots<-substitute(list(...))[-1]
+ newnames<-names(dots)
+
+ for(j in seq(along=dots)){
+ object$phase1$sample$variables[,newnames[j]]<-eval(dots[[j]], object$phase1$sample$variables, parent.frame())
+ object$phase1$full$variables[,newnames[j]]<-eval(dots[[j]], object$phase1$full$variables, parent.frame())
+ }
+
+ object$call<-sys.call(-1)
+ object
+}
+
+subset.twophase<-function(x,subset,...){
+ e <- substitute(subset)
+ r <- eval(e, x$phase1$sample$variables, parent.frame())
+ r <- r & !is.na(r)
+ x<-x[r,]
+ x$call<-sys.call(-1)
+ x
+}
+
+
+calibrate.twophase<-function(design, phase=2, formula, population,
+ calfun=c("linear","raking","logit","rrz"),...){
+
+ if (phase==1){
+ stop("phase 1 calibration not yet implemented")
+ phase1<-calibrate(design$phase1$full,formula, population, ...)
+ design$phase1$full<-phase1
+ design$phase1$sample<-phase1[design$subset,]
+
+ } else if(phase==2){
+
+ if (is.character(calfun)) calfun<-match.arg(calfun)
+ if (is.character(calfun) && calfun=="rrz"){
+ design<-estWeights(design, formula,...)
+ design$call<-sys.call(-1)
+ return(design)
+ }
+
+ if (missing(population) || is.null(population)){
+ ## calibrate to phase 1 totals
+ population<-colSums(model.matrix(formula,
+ model.frame(formula, design$phase1$full$variables)))
+ }
+
+ phase2<-design$phase2
+ phase2$variables<-design$phase1$sample$variables
+ phase2<-calibrate(phase2,formula,population,calfun=calfun,...)
+ g<-design$phase2$prob/phase2$prob
+ phase2$variables<-NULL
+ design$phase2<-phase2
+ design$usu<-design$usu/g
+
+ } else stop("`phase' must be 1 or 2")
+
+
+ if (length(design$phase1$sample$prob)==length(design$phase2$prob))
+ design$prob<-design$phase1$sample$prob*design$phase2$prob
+ else{
+ design$prob<-rep(Inf,length(design$phase1$sample$prob))
+ design$prob[subset]<-design$prob[subset]*design$phase2$prob
+ }
+
+ design$call<-sys.call(-1)
+
+ design
+
+}
+
+
+postStratify.twophase<-function(design, ...) {
+ stop("postStratify not implemented for two-phase designs. Use calibrate()")
+}
+
+estWeights<-function(data, formula, ...) UseMethod("estWeights")
+
+estWeights.twophase<-function(data, formula=NULL, working.model=NULL,...){
+
+ if (!xor(is.null(formula), is.null(working.model)))
+ stop("Must specify one of formula, working.model")
+
+ certainty<-rep(FALSE,nrow(data$phase1$full$variables))
+ certainty[data$subset]<-data$phase2$fpc$popsize==data$phase2$fpc$sampsize
+
+ if (!is.null(formula)){
+ ff<-data$subset~rhs
+ ff[[3]]<-formula[[2]]
+ if(!attr(terms(ff),"intercept")) stop("formula must have an intercept")
+
+ model<-glm(ff, data=data$phase1$full$variables, family=binomial(),
+ subset=!certainty, na.action=na.fail)
+ } else {
+ xx<-estfun(working.model)
+ model<-glm(data$subset~xx,family=binomial(), subset=!certainty, na.action=na.fail)
+ }
+ fitp<-as.numeric(certainty[data$subset])
+ fitp[!certainty[data$subset]]<-fitted(model)[data$subset[!certainty]]
+
+ g<- (1/fitp)/(1/data$phase2$prob)
+
+ mm<-model.matrix(model)[data$subset[!certainty],,drop=FALSE]
+
+ if (any(certainty)){
+ mm1<-matrix(0,ncol=ncol(mm)+1,nrow=sum(data$subset))
+ mm1[,1]<-as.numeric(certainty[data$subset])
+ mm1[!certainty[data$subset],-1]<-mm
+ mm<-mm1
+ }
+
+ whalf<-sqrt(1/data$phase2$prob)
+
+ caldata<-list(qr=qr(mm*whalf), w=g*whalf, stage=0, index=NULL)
+ class(caldata) <- c("greg_calibration","gen_raking")
+
+ data$phase2$prob<-fitp
+ data$usu<-data$usu/g
+ data$phase2$postStrata <- c(data$phase2$postStrata, list(caldata))
+
+ if (length(data$phase1$sample$prob)==length(data$phase2$prob))
+ data$prob<-data$phase1$sample$prob*data$phase2$prob
+ else{
+ data$prob<-rep(Inf,length(data$phase1$sample$prob))
+ data$prob[subset]<-data$prob[subset]*data$phase2$prob
+ }
+
+ data$call <- sys.call(-1)
+
+ data
+
+}
+
+
+estfun<-function(model,...) UseMethod("estfun")
+estfun.coxph<-function(model, ...) resid(model,"score")
+estfun.glm<-function(model){
+ xmat<-model.matrix(model)
+ residuals(model,"working")*model$weights*xmat
+}
+estfun.lm<-function(model,...){
+ model.matrix(model)*resid(model)
+}
+
+
+
+
+estWeights.data.frame<-function(data,formula=NULL, working.model=NULL,
+ subset=NULL, strata=NULL,...){
+
+ if (is.null(subset)){
+ subset<-complete.cases(data)
+ if (all(subset))
+ stop("No missing data.")
+ }
+
+ if(is.null(strata)){
+ des<-twophase(id=list(~1,~1), subset=subset, data=data)
+ } else{
+ des<-twophase(id=list(~1,~1), subset=subset, data=data,
+ strata=list(NULL,strata))
+ }
+
+ rval<-estWeights(des,formula=formula,working.model=working.model)
+ rval$call<-sys.call(-1)
+ rval
+
+}
diff --git a/R/twophase2.R b/R/twophase2.R
new file mode 100644
index 0000000..057a485
--- /dev/null
+++ b/R/twophase2.R
@@ -0,0 +1,682 @@
+##
+## Constructing cov(R_i,R_j)/pi^*_ij, or \check{\check{\Delta}}_ij in Sarndal's notation
+## We use this form because it can be sparse and because it is easy to combine
+## multistage and multiphase sampling.
+##
+Dcheck_strat<-function(strata, prob){
+ strata<-as.numeric(strata) ## for ave()
+ n<-length(strata)
+ rval<-matrix(0, n,n)
+ sampsize<-ave(strata,strata,FUN=length)
+ strats<-unique(strata)
+ for(strat in strats){
+ these <- strata == strat
+ rval[these,these]<- -(1-prob[these])/(sampsize[these]-1)
+ }
+ diag(rval)<-(1-prob)
+ rval
+}
+
+Dcheck_multi<-function(id,strata,probs){
+ nstage<-NCOL(id)
+ rval<-matrix(0,NROW(id),NROW(id))
+ for(stage in 1:nstage){
+ uid<-!duplicated(id[,stage])
+ idx<-match(id[,stage],id[uid,stage])
+ this_stage<-Dcheck_strat(strata[uid,stage],probs[uid,stage])[idx,idx]
+ rval<- twophaseDcheck(rval, this_stage)
+ }
+ rval
+ }
+
+Dcheck_subset<-function(strata, subset, prob, withreplacement){
+ strata<-as.numeric(strata) ## for ave()
+ N<-length(strata)
+ n<-sum(subset)
+ rval<-matrix(0, n,n)
+ sampsize<-ave(strata,strata,FUN=length)
+ strats<-unique(strata)
+ if (!withreplacement){
+ for(strat in strats){
+ these <- strata == strat
+ ithese<-which(these)
+ rval[these[subset],these[subset]]<- -(1-prob[ithese[subset]])/(sampsize[ithese[subset]]-1)
+ }
+ }
+ diag(rval)<-(1-prob[subset])
+ rval
+ }
+
+Dcheck_multi_subset<-function(id,strata,subset,probs,withreplacement){
+ nstage<-NCOL(id)
+ n<-sum(subset)
+ rval<-matrix(0,n,n)
+ if (all(probs==1) && withreplacement)
+ return(as(diag(n),"sparseMatrix"))
+ for(stage in 1:nstage){
+ uid<-!duplicated(id[,stage])
+ insubset<-rowsum(as.integer(subset),id[,stage],reorder=FALSE)>0
+ idx<-match(id[subset,stage],id[subset,stage][uid])
+ this_stage<-Dcheck_subset(strata[uid,stage],insubset,probs[uid,stage],withreplacement)[idx,idx]
+ rval<- twophaseDcheck(rval, this_stage)
+ }
+ rval
+ }
+twophaseDcheck<-function(Dcheck1,Dcheck2){
+ as(-Dcheck1*Dcheck2+Dcheck1+Dcheck2,"sparseMatrix")
+}
+
+make_covmat<-function(design1,design2,subset){
+ withreplacement<-is.null(design1$fpc$popsize)
+ phase1<-Dcheck_multi_subset(design1$cluster, design1$strata, subset, design1$allprob, withreplacement)
+ phase2<-Dcheck_multi(design2$cluster, design2$strata, design2$allprob)
+ dcheck<-twophaseDcheck(phase1,phase2)
+ list(phase1=phase1,phase2=phase2,full=dcheck)
+}
+
+##
+## Based on twophase(), so it computes some stuff that is no longer necessary.
+## Will be pruned in the future.
+##
+twophase2<-function(id,strata=NULL, probs=NULL, fpc=NULL,
+ subset, data){
+
+ d1<-svydesign(ids=id[[1]],strata=strata[[1]],weights=NULL,
+ probs=probs[[1]],fpc=fpc[[1]],data=data)
+
+ if(inherits(subset,"formula"))
+ subset<-eval.parent(model.frame(subset,data=data,na.action=na.pass))[[1]]
+
+ if(!is.logical(subset) && sort(unique(subset))==c(0,1))
+ subset<-as.logical(subset)
+
+ if (any(is.na(subset))) stop("missing values in 'subset'")
+
+ d1s<-svydesign(ids=id[[1]],strata=strata[[1]],weights=NULL,
+ probs=probs[[1]],fpc=fpc[[1]],data=data[subset,])
+ d1s$prob<-d1$prob[subset]
+ d1s$allprob<-d1$allprob[subset,,drop=FALSE]
+
+ ## work out phase-two fpc
+ if (is.null(fpc[[2]])){
+ complete.vars<-names(data)[apply(data, 2, function(v) all(!is.na(v)))]
+ if (all(c(all.vars(id[[2]]), all.vars(strata[[2]])) %in% complete.vars)){
+ dfpc<-svydesign(ids=id[[2]], strata=strata[[2]], data=data, probs=NULL)
+ popsize<-mapply(function(s,i) ave(!duplicated(i),s,FUN=sum), dfpc$strata, dfpc$cluster)
+ rm(dfpc)
+ } else {
+ warning("Second-stage fpc not specified and not computable")
+ popsize<-NULL
+ }
+ } else popsize<-NULL
+
+ d2<-svydesign(ids=id[[2]], strata=strata[[2]], probs=probs[[2]],
+ weights=NULL, fpc=fpc[[2]], data=data[subset,])
+
+ ## ugly hack to get nicer labels
+ if(!is.null(fpc[[2]])){
+ d2call<-bquote(svydesign(ids=.(id[[2]]),strata=.(strata[[2]]), probs=.(probs[[2]]),
+ fpc=.(fpc[[2]])))
+ } else{
+ d2call<-bquote(svydesign(ids=.(id[[2]]),strata=.(strata[[2]]), probs=.(probs[[2]]),
+ fpc=`*phase1*`))
+ }
+ for(i in names(d2call)[-1])
+ d2call[[i]]<-d2call[[i]]
+ d2$call<-d2call
+ d1call<-bquote(svydesign(ids=.(id[[1]]), strata=.(strata[[1]]), probs=.(probs[[1]]),
+ fpc=.(fpc[[1]])))
+ for(i in names(d1call)[-1])
+ d1call[[i]]<-d1call[[i]]
+ d1$call<-d1call
+
+
+ ## Add phase 2 fpc and probs if they were computed rather than specified.
+ if (!is.null(popsize))
+ d2$fpc<-as.fpc(popsize[subset,,drop=FALSE],d2$strata,d2$cluster)
+ if(is.null(probs[[2]]) && !is.null(d2$fpc$popsize)){
+ d2$allprob<-1/weights(d2$fpc,final=FALSE)
+ d2$prob<-apply(as.data.frame(d2$allprob),1,prod)
+ }
+
+ d2$variables<-NULL
+ deltacheck<-make_covmat(d1,d2, subset)
+
+ rval<-list(phase1=list(full=d1,sample=d1s),
+ phase2=d2,
+ subset=subset, dcheck=deltacheck)
+ rval$prob<-rval$phase1$sample$prob
+
+ ## Are phase 2 PSUs the same as Phase 1 USUs, or smaller?
+ rval$samescale<- !any(duplicated(d1s$cluster[,NCOL(d1s$cluster)][!duplicated(d2$cluster[,1])]))
+
+ ## For each phase 1 sampling unit, need probability of being represented
+ ## at phase 2.
+ nunique<-function(x) sum(!duplicated(x))
+ m<-NCOL(rval$phase1$sample$cluster)
+ if(d2$has.strata){
+ if (inherits(strata[[2]],"formula"))
+ sa<-eval(attr(terms(strata[[2]]),"variables")[[2]],d1$variables)
+ else
+ sa<-d1$strata[,1]
+ cm<-rval$phase1$full$cluster[,m]
+ if (nunique(sa)!=nunique(sa[subset]))
+ stop("Some phase-2 strata have zero sampling fraction")
+ rval$usu<-ave(cm[subset],sa[subset],FUN=nunique)/ave(cm,sa,FUN=nunique)[subset]
+ } else {
+ rval$usu<-drop(with(rval$phase1$sample,ave(cluster[,m], strata[,m], FUN=nunique))/rval$phase1$full$fpc$sampsize[rval$subset])
+ }
+
+ if (length(rval$phase1$sample$prob)==length(d2$prob))
+ rval$prob<-rval$phase1$sample$prob*d2$prob
+ else{
+ rval$prob<-rep(Inf,length(rval$phase1$sample$prob))
+ rval$prob[subset]<-rval$prob[subset]*d2$prob
+ }
+ rval$call<-sys.call()
+ class(rval) <- c("twophase2","survey.design")
+ rval
+}
+
+print.twophase2<-function(x,...){
+ cat("Two-phase sparse-matrix design:\n ")
+ print(x$call)
+ cat("Phase 1:\n")
+ print(x$phase1$full)
+ cat("Phase 2:\n")
+ print(x$phase2)
+ invisible(x)
+}
+
+summary.twophase2<-function(object,...){
+ class(object)<-"summary.twophase2"
+ object
+}
+
+print.summary.twophase2<-function(x,...,varnames=TRUE){
+ cat("Two-phase sparse-matrix design:\n ")
+ print(x$call)
+ cat("Phase 1:\n")
+ print(x$phase1$full,design.summaries=TRUE,varnames=FALSE)
+ cat("Phase 2:\n")
+ print(x$phase2,design.summaries=TRUE, varnames=FALSE)
+ if (varnames){
+ cat("Data variables:\n")
+ print(names(x$phase1$full$variables))
+ }
+ invisible(x)
+}
+
+twophase2var<-function(x,design){
+ ## calibration is allowed at phase one or phase two,
+ ## but not for clusters within a phase
+ postStrata2<-design$phase2$postStrata
+ postStrata1<-design$phase1$full$postStrata
+ if (is.null(postStrata1) && is.null(postStrata2)){
+ rval<-htvar.matrix(x,design$dcheck$full)
+ ph2<-htvar.matrix(x,design$dcheck$phase2)
+ attr(rval,"phases")<-list(phase1=rval-ph2,phase2=ph2)
+ return(rval)
+ }
+ if (!is.null(postStrata1)){
+ ##phase 1 calibration
+ ## x is size of phase-2 sample,need to expand to allow calibration.
+ y<-matrix(0,ncol=ncol(x),nrow=length(design$subset))
+ y[design$subset,]<-x
+ for (psvar in postStrata1){
+ if (inherits(psvar, "greg_calibration")) {
+ if (psvar$stage==0){
+ ## G-calibration at population level
+ y<-qr.resid(psvar$qr,y/psvar$w)*psvar$w
+ } else {
+ ## G-calibration within clusters
+ stop("calibration within clusters not allowed for two-phase designs")
+ }
+ } else {
+ ## ordinary post-stratification
+ psw<-attr(psvar, "weights")
+ postStrata<-as.factor(psvar)
+ psmeans<-rowsum(y/psw,psvar,reorder=TRUE)/as.vector(table(factor(psvar)))
+ y<- y-psmeans[match(psvar,sort(unique(psvar))),]*psw
+ x1<-y[design$subset,,drop=FALSE]
+ }
+ }
+ } else x1<-x
+ phase1var<-htvar.matrix(x1,design$dcheck$full)-htvar.matrix(x1,design$dcheck$phase2)
+ if (!is.null(postStrata2)){
+ ##phase 2 calibration
+ for (psvar in postStrata2){
+ if (inherits(psvar, "greg_calibration")) {
+ if (psvar$stage==0){
+ ## G-calibration at population level
+ x2<-qr.resid(psvar$qr,x/psvar$w)*psvar$w
+ } else {
+ ## G-calibration within clusters
+ stop("calibration within clusters not allowed for two-phase designs")
+ }
+ } else {
+ ## ordinary post-stratification
+ psw<-attr(psvar, "weights")
+ postStrata<-as.factor(psvar)
+ psmeans<-rowsum(x/psw,psvar,reorder=TRUE)/as.vector(table(factor(psvar)))
+ x2<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw
+ }
+ }
+ } else x2<-x
+ phase2var<-htvar.matrix(x2,design$dcheck$phase2)
+ rval<-phase1var+phase2var
+ attr(rval,"phases")<-list(phase1=phase1var,phase2=phase2var)
+ rval
+}
+
+svytotal.twophase2<-function(x,design, na.rm=FALSE, deff=FALSE,...){
+
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$phase1$sample$variables,
+ na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ } else {
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else {
+ if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ }
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ design<-design[nas==0,]
+ if(length(nas)>length(design$prob))
+ x<-x[nas==0,,drop=FALSE]
+ else
+ x[nas>0,]<-0
+ }
+
+ N<-sum(1/design$prob)
+ total <- colSums(x/as.vector(design$prob),na.rm=na.rm)
+ class(total)<-"svystat"
+ attr(total, "var")<-v<-twophase2var(x/design$prob,design)
+ attr(total,"statistic")<-"total"
+
+ if (is.character(deff) || deff){
+ nobs<-NROW(design$cluster)
+ if (deff=="replace")
+ vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2*(N-nobs)/N
+ else
+ vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2
+ attr(total, "deff")<-v/vsrs
+ }
+
+
+ return(total)
+}
+
+svymean.twophase2<-function(x,design, na.rm=FALSE,deff=FALSE,...){
+
+ if (inherits(x,"formula")){
+ ## do the right thing with factors
+ mf<-model.frame(x,design$phase1$sample$variables
+ ,na.action=na.pass)
+ xx<-lapply(attr(terms(x),"variables")[-1],
+ function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
+ cols<-sapply(xx,NCOL)
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ scols<-c(0,cumsum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-do.call("c",lapply(xx,colnames))
+ }
+ else {
+ if(typeof(x) %in% c("expression","symbol"))
+ x<-eval(x, design$variables)
+ else {
+ if(is.data.frame(x) && any(sapply(x,is.factor))){
+ xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
+ cols<-sapply(xx,NCOL)
+ scols<-c(0,cumsum(cols))
+ cn<-character(sum(cols))
+ for(i in 1:length(xx))
+ cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
+ x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
+ for(i in 1:length(xx)){
+ x[,scols[i]+1:cols[i]]<-xx[[i]]
+ }
+ colnames(x)<-cn
+ }
+ }
+ }
+
+
+ x<-as.matrix(x)
+
+ if (na.rm){
+ nas<-rowSums(is.na(x))
+ if (any(nas>0))
+ design<-design[nas==0,]
+ x[nas>0,]<-0
+ }
+
+ pweights<-1/design$prob
+ psum<-sum(pweights)
+ average<-colSums(x*pweights/psum)
+ x<-sweep(x,2,average)
+ v<-twophase2var(x*pweights/psum,design)
+ attr(average,"var")<-v
+ attr(average,"statistic")<-"mean"
+ class(average)<-"svystat"
+ if (is.character(deff) || deff){
+ nobs<-nrow(design)
+ if(deff=="replace"){
+ vsrs<-svyvar(x,design,na.rm=na.rm)/(nobs)
+ } else {
+ if(psum<nobs) {
+ vsrs<-NA*v
+ warning("Sample size greater than population size: are weights correctly scaled?")
+ } else{
+ vsrs<-svyvar(x,design,na.rm=na.rm)*(psum-nobs)/(psum*nobs)
+ }
+ }
+ attr(average, "deff")<-v/vsrs
+ }
+
+ return(average)
+}
+
+model.frame.twophase2<-function(formula,phase=2,...){
+ if (phase==1)
+ formula$phase1$full$variables
+ else
+ formula$phase1$sample$variables
+}
+
+svyratio.twophase2<-function(numerator=formula, denominator, design, separate=FALSE,na.rm=FALSE,formula,...){
+
+ if (separate){
+ strats<-sort(unique(design$phase2$strata[,1]))
+ if (!design$phase2$has.strata)
+ warning("Separate and combined ratio estimators are the same for unstratified designs")
+ rval<-list(ratios=lapply(strats,
+ function(s) {
+ tmp<-svyratio(numerator, denominator,
+ subset(design, design$phase2$strata[,1] %in% s),
+ separate=FALSE,...)
+ attr(tmp,"call")<-bquote(Stratum==.(s))
+ tmp}))
+ names(rval$ratios)<-strats
+
+ class(rval)<-c("svyratio_separate")
+ rval$call<-sys.call()
+ rval$strata<-strats
+ return(rval)
+ }
+
+ if (inherits(numerator,"formula"))
+ numerator<-model.frame(numerator,model.frame(design),na.action=na.pass)
+ else if(typeof(numerator) %in% c("expression","symbol"))
+ numerator<-eval(numerator, design$variables)
+ if (inherits(denominator,"formula"))
+ denominator<-model.frame(denominator,model.frame(design),na.action=na.pass)
+ else if(typeof(denominator) %in% c("expression","symbol"))
+ denominator<-eval(denominator, model.frame(design))
+
+ nn<-NCOL(numerator)
+ nd<-NCOL(denominator)
+
+ all<-cbind(numerator,denominator)
+ nas<-!complete.cases(all)
+ if (na.rm){
+ design<-design[!nas,]
+ all<-all[!nas,,drop=FALSE]
+ numerator<-numerator[!nas,,drop=FALSE]
+ denominator<-denominator[!nas,,drop=FALSE]
+ }
+ allstats<-svytotal(all, design)
+ rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/"))
+
+
+ vars<-matrix(ncol=nd,nrow=nn)
+ for(i in 1:nn){
+ for(j in 1:nd){
+ r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob)
+ vars[i,j]<-twophase2var(r*1/design$prob, design)
+ }
+ }
+ colnames(vars)<-names(denominator)
+ rownames(vars)<-names(numerator)
+ rval$var<-vars
+ attr(rval,"call")<-sys.call()
+ class(rval)<-"svyratio"
+ rval
+
+ }
+
+
+"[.twophase2"<-function (x,i, ..., drop=TRUE){
+ if (!missing(i)){
+ ## Set weights to zero: don't try to save memory
+ ## There should be an easier way to complement a subscript..
+ if (is.logical(i) && any(!i)){
+ ## logical indexing: use !
+ x$prob[!i]<-Inf
+ x$phase2$prob[!i]<-Inf
+ x$dcheck<-lapply(x$dcheck, function(m) {m[!i,!i]<-0; m})
+ } else if (is.numeric(i) && length(i)){
+ ## numeric indexing: use -
+ x$prob[-i]<-Inf
+ x$phase2$prob[-i]<-Inf
+ x$dcheck<-lapply(x$dcheck, function(m) {m[-i,-i]<-0;m})
+ } else if (is.character(i)){
+ ##character indexing: use brute force and ignorance
+ tmp<-x$prob[i,]
+ x$prob<-rep(Inf, length(x$prob))
+ x$prob[i,]<-tmp
+ tmp<-x$phase2$prob[i,]
+ x$phase2$prob<-rep(Inf, length(x$phase2$prob))
+ x$phase2$prob[i,]<-tmp
+ x$dcheck<-lapply(x$dcheck, function(m) {n<-Matrix(ncol(m),ncol(m)); n[i,i]<-m[i,i]})
+ }
+ index<-is.finite(x$prob)
+ psu<-!duplicated(x$phase2$cluster[index,1])
+ tt<-table(x$phase2$strata[index,1][psu])
+ if(any(tt==1)){
+ warning(sum(tt==1)," strata have only one PSU in this subset.")
+ }
+ } else {
+ x$phase1$full<-x$phase1$full[,...]
+ x$phase1$sample<-x$phase1$sample[,...]
+ x$phase2<-x$phase2[,...]
+ }
+ x
+}
+
+dim.twophase2<-function(x,...){
+ dim(x$phase1$sample$variables)
+}
+
+degf.twophase2<-function(design,...) degf(design$phase2)
+
+na.fail.twophase2<-function(object,...){
+ tmp<-na.fail(object$phase1$sample$variables,...)
+ object
+}
+
+na.omit.twophase2<-function(object,...){
+ tmp<-na.omit(object$phase1$sample$variables,...)
+ omit<-attr(tmp,"na.action")
+ if (length(omit)){
+ object<-object[-omit,]
+ object$phase1$sample$variables<-tmp
+ attr(object,"na.action")<-omit
+ }
+ object
+}
+
+na.exclude.twophase2<-function(object,...){
+ tmp<-na.exclude(object$phase1$sample$variables,...)
+ exclude<-attr(tmp,"na.action")
+ if (length(exclude)){
+ object<-object[-exclude,]
+ object$phase1$sample$variables<-tmp
+ attr(object,"na.action")<-exclude
+ }
+ object
+}
+
+
+update.twophase2<-function(object,...){
+
+ dots<-substitute(list(...))[-1]
+ newnames<-names(dots)
+
+ for(j in seq(along=dots)){
+ object$phase1$sample$variables[,newnames[j]]<-eval(dots[[j]], object$phase1$sample$variables, parent.frame())
+ object$phase1$full$variables[,newnames[j]]<-eval(dots[[j]], object$phase1$full$variables, parent.frame())
+ }
+
+ object$call<-sys.call(-1)
+ object
+}
+
+subset.twophase2<-function(x,subset,...){
+ e <- substitute(subset)
+ r <- eval(e, x$phase1$sample$variables, parent.frame())
+ r <- r & !is.na(r)
+ x<-x[r,]
+ x$call<-sys.call(-1)
+ x
+}
+
+
+calibrate.twophase2<-function(design, phase=2, formula, population,
+ calfun=c("linear","raking","logit","rrz"),...){
+
+ if (phase==1){
+ phase1<-calibrate(design$phase1$full,formula, population, ...)
+ design$phase1$full<-phase1
+ design$phase1$sample<-phase1[design$subset,]
+
+ } else if(phase==2){
+
+ if (is.character(calfun)) calfun<-match.arg(calfun)
+ if (is.character(calfun) && calfun=="rrz"){
+ design<-estWeights(design, formula,...)
+ design$call<-sys.call(-1)
+ return(design)
+ }
+
+ if (missing(population) || is.null(population)){
+ ## calibrate to phase 1 totals
+ population<-colSums(model.matrix(formula,
+ model.frame(formula, design$phase1$full$variables)))
+ }
+
+ phase2<-design$phase2
+ phase2$variables<-design$phase1$sample$variables
+ phase2<-calibrate(phase2,formula,population,calfun=calfun,...)
+ g<-design$phase2$prob/phase2$prob
+ phase2$variables<-NULL
+ design$phase2<-phase2
+ design$usu<-design$usu/g
+
+ } else stop("`phase' must be 1 or 2")
+
+
+ if (length(design$phase1$sample$prob)==length(design$phase2$prob))
+ design$prob<-design$phase1$sample$prob*design$phase2$prob
+ else{
+ design$prob<-rep(Inf,length(design$phase1$sample$prob))
+ design$prob[subset]<-design$prob[subset]*design$phase2$prob
+ }
+
+ design$call<-sys.call(-1)
+
+ design
+
+}
+
+
+postStratify.twophase2<-function(design, ...) {
+ stop("postStratify not yet implemented for two-phase designs. Use calibrate()")
+}
+
+
+estWeights.twophase2<-function(data, formula=NULL, working.model=NULL,...){
+
+ if (!xor(is.null(formula), is.null(working.model)))
+ stop("Must specify one of formula, working.model")
+
+ certainty<-rep(FALSE,nrow(data$phase1$full$variables))
+ certainty[data$subset]<-data$phase2$fpc$popsize==data$phase2$fpc$sampsize
+
+ if (!is.null(formula)){
+ ff<-data$subset~rhs
+ ff[[3]]<-formula[[2]]
+ if(!attr(terms(ff),"intercept")) stop("formula must have an intercept")
+
+ model<-glm(ff, data=data$phase1$full$variables, family=binomial(),
+ subset=!certainty, na.action=na.fail)
+ } else {
+ xx<-estfun(working.model)
+ model<-glm(data$subset~xx,family=binomial(), subset=!certainty, na.action=na.fail)
+ }
+ fitp<-as.numeric(certainty[data$subset])
+ fitp[!certainty[data$subset]]<-fitted(model)[data$subset[!certainty]]
+
+ g<- (1/fitp)/(1/data$phase2$prob)
+
+ mm<-model.matrix(model)[data$subset[!certainty],,drop=FALSE]
+
+ if (any(certainty)){
+ mm1<-matrix(0,ncol=ncol(mm)+1,nrow=sum(data$subset))
+ mm1[,1]<-as.numeric(certainty[data$subset])
+ mm1[!certainty[data$subset],-1]<-mm
+ mm<-mm1
+ }
+
+ whalf<-sqrt(1/data$phase2$prob)
+
+ caldata<-list(qr=qr(mm*whalf), w=g*whalf, stage=0, index=NULL)
+ class(caldata) <- c("greg_calibration","gen_raking")
+
+ data$phase2$prob<-fitp
+ data$usu<-data$usu/g
+ data$phase2$postStrata <- c(data$phase2$postStrata, list(caldata))
+
+ if (length(data$phase1$sample$prob)==length(data$phase2$prob))
+ data$prob<-data$phase1$sample$prob*data$phase2$prob
+ else{
+ data$prob<-rep(Inf,length(data$phase1$sample$prob))
+ data$prob[subset]<-data$prob[subset]*data$phase2$prob
+ }
+
+ data$call <- sys.call(-1)
+
+ data
+
+}
+
+
diff --git a/R/weightconstruction.R b/R/weightconstruction.R
new file mode 100755
index 0000000..bec352a
--- /dev/null
+++ b/R/weightconstruction.R
@@ -0,0 +1,217 @@
+##
+## Functions for constructing nonresponse weights
+##
+##
+## nonresponse(): constructor
+##
+## sparseCells(): identify cells with low count, high non-response weight, high total weight.
+##
+## neighbours(): find neighbours of specified cells
+##
+## joinCells(): combine two cells.
+##
+## reweight(): take a nonreponse object and a set of pweights or a survey design, and
+## produce an updates set of weights or design
+##
+## weights(): extract nonresponse weights
+##
+## After constructing an object with nonresponse(), use sparse() to find
+## cells that need combining, neighbours() to find things to combine them with,
+## and joinCells() to combine them. Rinse and repeat.
+## Use weights() to extract the final weights
+##
+
+
+nonresponse<-function(sample.weights,sample.counts,population){
+
+ if (!all.equal(dimnames(sample.weights),dimnames(sample.counts)))
+ stop("Counts and weights have different dimensions")
+ if (!all.equal(dimnames(sample.weights),dimnames(population)))
+ stop("sample and population dimensions do not match")
+
+ index<-array(1:length(sample.weights),dim=dim(sample.weights),
+ dimnames=dimnames(sample.weights))
+
+ rval<-list(weights=sample.weights,
+ counts=sample.counts,
+ population=population,
+ call=sys.call(),
+ index=index,
+ joins=NULL)
+ class(rval)<-"nonresponse"
+ rval
+
+}
+
+
+print.nonresponse<-function(x,digits=max(3,getOption("digits")-4),...,max.print=5){
+ cat("Call: ")
+ print(x$call)
+ n<-length(x$index)
+ nunq<-length(unique(as.vector(x$index)))
+ cat(n,"original cells, ",nunq,"distinct cells remaining\n")
+ show.all<-nunq<=max.print
+ counts<-rowsum(as.vector(x$counts), as.vector(x$index),reorder=FALSE)
+ ratios<-rowsum(as.vector(x$population),as.vector(x$index),reorder=FALSE)/rowsum(as.vector(x$weights),as.vector(x$index),reorder=FALSE)
+ totals<-rowsum(as.vector(x$population),as.vector(x$index),reorder=FALSE)/counts
+
+ if(length(x$joins)){
+ cat("Joins:\n")
+ lapply(x$joins, cat,"\n")
+ }
+ if (show.all){
+ cat("Counts: ")
+ cat(as.vector(signif(counts,digits)))
+ cat("\nNR weights: ")
+ cat(as.vector(signif(ratios,digits)))
+ cat("\nTotal weights: ")
+ cat(as.vector(signif(totals,digits)),"\n")
+ } else{
+ print(summary(data.frame(counts=counts,NRweights=ratios,totalwts=totals)))
+ }
+ invisible(NULL)
+}
+
+"[.nonresponse"<-function(x,i,..){
+ expand<-as.integer(as.factor(x$index))
+
+ counts<-rowsum(as.vector(x$counts), as.vector(x$index),reorder=FALSE)
+
+ ratios<-rowsum(as.vector(x$population),as.vector(x$index),reorder=FALSE)/rowsum(as.vector(x$weights),as.vector(x$index),reorder=FALSE)
+
+ totals<-rowsum(as.vector(x$population),as.vector(x$index),reorder=FALSE)/counts
+
+ mm<-matrix(nrow=length(i),ncol=length(dim(x$index)))
+ ii<-i-1
+ for(j in 1:length(dim(x$index))){
+ mm[,j]<-dimnames(x$index)[[j]][(ii %% dim(x$index)[j])+1]
+ ii<-ii/(dim(x$index)[j])
+ }
+ colnames(mm)<-names(dimnames(x$index))
+ rownames(mm)<-i
+
+ rval<-list(index=as.vector(x$index)[i], names=mm, totals=totals[expand][i],ratios=ratios[expand][i],counts=counts[expand][i])
+ rval$call<-sys.call()
+ class(rval)<-"nonresponseSubset"
+ rval
+}
+
+print.nonresponseSubset<-function(x,digits=max(2, getOption("digits")-4),...){
+ print(x$call)
+ cat("Cells: ")
+ cat(x$index)
+ cat("\nIndices:\n")
+ print(x$names)
+ cat("Summary:\n")
+ mm<-cbind(NRwt=signif(x$ratios,digits),wt=signif(x$totals,digits),n=x$counts)
+ rownames(mm)<-x$index
+ prmatrix(mm)
+ invisible(NULL)
+}
+
+sparseCells<-function(object,count=0,totalweight=Inf,nrweight=1.5){
+
+ expand<-as.integer(as.factor(object$index))
+
+ counts<-rowsum(as.vector(object$counts), as.vector(object$index),reorder=FALSE)
+
+ ratios<-rowsum(as.vector(object$population),as.vector(object$index),reorder=FALSE)/rowsum(as.vector(object$weights),as.vector(object$index),reorder=FALSE)
+
+ totals<-rowsum(as.vector(object$population),as.vector(object$index),reorder=FALSE)/counts
+
+ bad<- (ratios[expand]>nrweight | counts[expand]<count | totals[expand]>totalweight)
+ i<-which(bad & !duplicated(as.vector(object$index)))
+ if (length(i)==0)
+ return(NULL)
+
+ d<-dim(object$weights)
+ nd<-length(d)
+ dd<-cumprod(d)
+ dn<-dimnames(object$weights)
+
+ ii<-t(t(outer(i-1,dd,"%%")) %/% c(1,dd[-nd])) +1
+ keep<-!duplicated(i)
+ ii<-ii[keep,,drop=FALSE]
+ i<-i[keep]
+
+ mm<-matrix("",ncol=NCOL(ii),nrow=NROW(ii))
+ colnames(mm)<-names(dimnames(object$index))
+ rownames(mm)<-i
+
+ for(j in seq(length=length(d))){
+ mm[,j]<-dn[[j]][ii[,j]]
+ }
+
+ rval<-list(index=i, names=mm, totals=totals[expand][i],ratios=ratios[expand][i],
+ counts=counts[expand][i],call=sys.call())
+ class(rval)<-"nonresponseSubset"
+ rval
+}
+
+
+neighbours<-function(index,object){
+
+ d<-dim(object$index)
+ if (length(index)==1)
+ i<-object$index[index]
+ else
+ i<-sum(c(index-1,0)*cumprod(c(1,d)))+1
+ nd<-length(d)
+
+ ## all the joins of that index
+ ii<-which(object$index==object$index[i],arr.ind=TRUE)
+ nii<-nrow(ii)
+ diffs<-t(rbind(diag(rep(1,nd)),diag(rep(-1,nd))))
+ nbours<-matrix(nrow=2*nd*nii,ncol=nd)
+ counter<-0
+ for(j in 1:nii){
+ nbours[counter+1:(2*nd),]<-t(ii[j,]+diffs)
+ counter<-counter+2*nd
+ }
+ keep<-apply(nbours,1, function(x) all(x>0 & x<=d))
+ keep<- keep & !duplicated(nbours)
+ nbours<-nbours[keep,,drop=FALSE]
+
+ nbour.index<-apply(nbours,1, function(x) sum(c(x-1,0)*cumprod(c(1,d)))+1)
+
+ nbour.index<-nbour.index[!(object$index[nbour.index] %in% object$index[index])]
+
+ object[nbour.index]
+}
+
+
+
+weights.nonresponse<-function(object,...){
+ w<-rowsum(as.vector(object$population),as.vector(object$index))/rowsum(as.vector(object$weights),as.vector(object$index))
+ expand<-as.integer(as.factor(object$index))
+ array(w[expand],dim=dim(object$index),dimnames=dimnames(object$index))
+}
+
+joinCells<-function(object,a,...){
+ if (!is.list(a))
+ a<-list(a,...)
+
+ d<-dim(object$index)
+ nd<-length(d)
+ if (length(a[[1]])>1)
+ a<-sapply(a,function(ai) sum(c(ai-1,0)*cumprod(c(1,d)))+1)
+ else
+ a<-do.call("c",a)
+ nd<-length(d)
+
+ if(length(a)<2){
+ warning("Can't join a single cell")
+ return(invisible(object))
+ }
+
+ indices<-object$index[a]
+ if (length(unique(indices))<2){
+ warning("These cells are already joined")
+ return(invisible(object))
+ }
+
+ object$index[object$index %in% object$index[a]]<-min(object$index[a])
+ object$joins<-c(object$joins,list(which(object$index %in% object$index[a])))
+ object
+
+}
diff --git a/THANKS b/THANKS
new file mode 100644
index 0000000..188639e
--- /dev/null
+++ b/THANKS
@@ -0,0 +1,78 @@
+The survey package has benefited from code, bug reports, and feature
+requests from users, including:
+ Daniel Almirall
+ Guiseppe Antonaci
+ Brad Biggerstaff
+ Milan Bouchet-Valat
+ Norman Breslow
+ Arthur Burke
+ Ron Burns
+ Stefano Calza
+ Shelby Chartkoff
+ David Collins
+ Anthony Damico
+ Wade Davis
+ Michael Donohue
+ Rex Dwyer
+ Melanie Edwards
+ Francisco Fabuel
+ Bob Fay
+ Daniel Fernandes
+ Ben French
+ Jacques Ferrez
+ Daniella Gollinelli
+ Alistair Gray
+ James Greiner
+ Zachary Grinspan
+ Alois Haslinger
+ Kieran Healy
+ Lucia Hindorff
+ Jeffery Hughes
+ Ward Kinkade
+ Stas Kolenikov
+ Scott Kostyshak
+ Alex Kowarik
+ Michael Laviolette
+ Jean Paul Lucas
+ Lisa McShane
+ Ana Patricia Martins
+ Tapan Mehta
+ Daryl Morris
+ Joey Morris
+ Kirill Mueller
+ Jean Opsomer
+ Gonzalo Perez
+ Djalma Pessoa
+ Ana Quiterio
+ Gillian Raab
+ Christian Raschke
+ Brian Ripley
+ Fred Rohde
+ Steve Roberts
+ Andrew Robinson
+ Mark Rosenstein
+ Lee Sieswerda
+ Phil Smith
+ Matthew Soldner
+ Corey Sparks
+ Takahiro Tsuchiya
+ Richard Valliant
+ Tobias Verbeke
+ Renzo Vettori
+ Thomas Yokota
+ Diego Zardetto
+ Alan Zaslavsky
+and from dicussions with Chris Wild and Alastair Scott.
+
+Some work on the package while I was on sabbatical was funded by
+Fondation Leducq through a grant to the LINAT collaboration and by the
+Marsden Fund through a grant to the University of Auckland.
+
+Patrick Aboyoun did the S-PLUS port of version 3.6-8
+
+John Preston provided an example of the multistage rescaled bootstrap for testing.
+
+The svyolr() code and the likelihood-based confidenece intervals are largely ripped from
+the MASS package of Venables & Ripley, and parts of the code for svycoxph() are modified
+from Therneau's survival package.
+
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..c6270a1
--- /dev/null
+++ b/TODO
@@ -0,0 +1,57 @@
+Possible additional developments in no particular order:
+---------------------------------------------------------
+
+ [done?] options to handle quantiles in rounded continuous data like SUDAAN
+ does (if I can work out what it does).
+
+ - Score-based confidence intervals for glms
+
+ [mostly done] Rao-Scott-type tests for glms.
+
+[done] AIC/BIC
+
+- better choice of denominator df for tests
+
+ [done] More general HT estimator for sampling without replacement
+ - using joint inclusion probabilities
+ - using population first-order inclusion probabilities and Hartley-Rao approximation
+ - using Overton's approximation and sample inclusion probabilities
+ - using Berger's approximation and sample inclusion probabilities.
+
+ - GEE (you can do the independence working model just as another level of clustering).
+
+ - an interface to the Auckland semiparametric approach to two-phase designs.
+
+ - Parametric survival models (you can do these with svymle)
+
+ - linear mixed models?
+ - simpler case: multilevel model following sampling design
+ - nested model exactly matching design
+ - design is simpler than model
+ - interesting case: separate model and sampling designs.
+ - cluster sampling, so that higher-order sampling probabilities are available
+ - more general sampling.
+
+ [done] standard errors for survival curves, particularly in two-phase studies
+
+ - an interface to twophase and calibrate for IPTW estimation.
+
+ - Replicate weights for two-phase designs
+ [done] and for multistage designs with large sampling fraction (Fukuoka's BBE?)
+
+ [experimental] parallel processing for replicate weights, svyby(), svyrecvar()?
+
+ - Gini index and other summaries of concentration and inequality.
+
+ - Krista Giles' respondent-driven sampling estimators?
+
+ [done] database-backed designs for replicate weights.
+
+ - Multivariate statistics
+ [done] principal components
+ [experimental] factor analysis
+ [done in lavaan.survey package] SEMs?
+
+########## things to fix ################
+
+Use naresid() to get better missing-value handling in svyglm, panel.svysmooth, etc
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..5b6cc55
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/data/api.rda b/data/api.rda
new file mode 100644
index 0000000..571936e
Binary files /dev/null and b/data/api.rda differ
diff --git a/data/crowd.rda b/data/crowd.rda
new file mode 100644
index 0000000..d1be95a
Binary files /dev/null and b/data/crowd.rda differ
diff --git a/data/election.rda b/data/election.rda
new file mode 100644
index 0000000..b07e3e3
Binary files /dev/null and b/data/election.rda differ
diff --git a/data/fpc.rda b/data/fpc.rda
new file mode 100644
index 0000000..b64afb6
Binary files /dev/null and b/data/fpc.rda differ
diff --git a/data/hospital.rda b/data/hospital.rda
new file mode 100644
index 0000000..fbefcdf
Binary files /dev/null and b/data/hospital.rda differ
diff --git a/data/mu284.rda b/data/mu284.rda
new file mode 100644
index 0000000..041af9c
Binary files /dev/null and b/data/mu284.rda differ
diff --git a/data/nhanes.rda b/data/nhanes.rda
new file mode 100644
index 0000000..ca19d8a
Binary files /dev/null and b/data/nhanes.rda differ
diff --git a/data/scd.rda b/data/scd.rda
new file mode 100644
index 0000000..e577463
Binary files /dev/null and b/data/scd.rda differ
diff --git a/data/yrbs.rda b/data/yrbs.rda
new file mode 100644
index 0000000..cc2fb95
Binary files /dev/null and b/data/yrbs.rda differ
diff --git a/inst/BUGS b/inst/BUGS
new file mode 100644
index 0000000..bc2a6da
--- /dev/null
+++ b/inst/BUGS
@@ -0,0 +1,10 @@
+Known bug:
+
+1. calibrate and postStratify do not recompute the
+ finite population correction. This matters only
+ when the sampling fraction is very close to one
+
+2. svydesign should check that `weights` is the right length
+
+3. options(survey.lonely.psu="adjust") does not do what it says in the
+case of svytotal(). Not yet clear whether it does the right thing, though.
diff --git a/inst/CITATION b/inst/CITATION
new file mode 100644
index 0000000..38b2741
--- /dev/null
+++ b/inst/CITATION
@@ -0,0 +1,21 @@
+citHeader("To cite the survey package in publications use:")
+
+citEntry(entry="misc",
+ author="Thomas Lumley",
+ year=2016,
+ title="survey: analysis of complex survey samples",
+ note="R package version 3.31-5",
+ textVersion="T. Lumley (2016) \"survey: analysis of complex survey samples\". R package version 3.31-5." )
+
+
+
+citEntry(entry="article",
+ year=2004,
+ author="Thomas Lumley",
+ title = "Analysis of Complex Survey Samples",
+ journal="Journal of Statistical Software",
+ volume="9",number=1,pages="1-19",
+ note="R package verson 2.2",
+ textVersion="T. Lumley (2004) Analysis of complex survey samples. Journal of Statistical Software 9(1): 1-19"
+)
+
diff --git a/inst/COPYING b/inst/COPYING
new file mode 100644
index 0000000..293e0df
--- /dev/null
+++ b/inst/COPYING
@@ -0,0 +1,5 @@
+Except as otherwise stated, the code is copyright 2002-2014 Thomas Lumley
+
+svyolr() and many of its methods are closely based on polr() from the
+MASS package, copyright Brian Ripley and Bill Venables.
+
diff --git a/inst/NEWS b/inst/NEWS
new file mode 100755
index 0000000..116b477
--- /dev/null
+++ b/inst/NEWS
@@ -0,0 +1,1144 @@
+3.31-5 predict.svyglm() uses object$xlevels and object$contrasts and so should be able to
+ guess the right factor levels when they aren't supplied in newdata= (for @thosjleeper)
+
+ fix return() without parentheses in svykm.R
+
+3.31-4 svyciprop(,method="like") and confint.svyglm(method="like") work even when the
+ design effect is large.
+
+3.31-3 svyciprop has method="xlogit" that reproduces what SUDAAN and SPSS give. (for Rex Dwyer)
+
+ Added reference for svylogrank
+
+ Added example from YRBS for software comparison
+
+ Copied some names from NEWS into THANKS
+
+3.31-2 explicitly dropping dimensions on a 1x1 matrix
+
+3.31-1 Allow for incompatible change in output of CompQuadForm::farebrother()
+
+3.31 update isIdValid() to dbIsValid() for DBI changes.
+
+ explicitly :: or import ALL THE THINGS.
+
+ mse option for svrepdesign.character and svyrepdesign.imputationList was ignored (Antony Damico)
+
+ confint works on output of svycontrast (for Michael Laviolette)
+
+ denominator df fixed in confint.svyglm (Joey Morris)
+
+ svyboxplot rule for which lower-tail points are outliers was wrong (David Collins)
+
+ calibrate() with variable-specific epsilons and zero sample totals didn't work (Alex Kowarik)
+
+ document that regTermTest(method="LRT") can't handle models with a start= argument
+ and document how to use anova.svyglm instead. (Brad Biggerstaff)
+
+ update tests output for new formatting in current R.
+
+
+3.30-4 svypredmeans() does the same things (together with svycontrast()) as
+ as PREDMARG in SUDAAN (for Thomas Yokota and Anthony Damico)
+
+3.30-4 confint.svystat was handling denominator degrees of freedom wrongly
+ for the Wald method (Jared Smith)
+
+3.30-3 vcov.svrepstat does more sensible things when covariances aren't
+ estimated (eg for quantiles). This fixes issues with svyby
+
+ dropped support for old version of hexbin
+
+
+3.30-1 Fix example(svyplot) now that "hexbin" package no longer loads
+ grid package
+
+3.30 svyranktest() now allows k-sample tests (eg Kruskal-Wallis)
+
+ svylogrank() does the generalised G-rho-gamma logrank tests
+ [methods from Rader and Lipsitz (and probably al)]
+
+ various CRAN fixes
+
+3.29-9 AIC.svyglm, based on Rao-Scott approximation
+ BIC.svyglm, based on multivariate Gaussian likelihood for coefficients
+
+ svyglm.svyrep.design checks values are finite before replicating
+
+ calibrate() using a list of margins now allows named vectors for 1-d margins
+
+3.29-8 svyhist(freq=TRUE) works with replicate-weight designs (for Ward Kinkade)
+
+3.29-7 svyranktest() works with replicate-weight designs (for Matthew Soldner)
+
+ reference to the lavaan.survey package in ?svyfactanal
+
+3.29-6 svyby() now always includes within-domain covariances
+
+3.29-5 Change from multicore to parallel.
+ Parallel processing is now only available with R >=2.14
+
+ ddf argument really works now in confint.svyglm (Anthony Damico)
+
+ colour specification in plot.svykmlist now works (Mark Rosenstein)
+
+ svyplot() documentation explains how to annotate a hexbin plot
+
+3.29-4 add symmetric=TRUE to eigenvalue calculation in anova.svyloglin,
+ to improve numerical stability
+
+ subset.svyimputationList now allows the subsets to end up with
+ different observations in them (for Anthony Damico)
+
+ subset.svyDBimputationList now gives an error if the subsets
+ have different observations, not just a warning that people
+ ignored.
+
+ svydesign gives an error if there is only one PSU, to catch
+ omission of tilde in svydesign(id=~1,...) (Milan Bouchet-Valat)
+
+3.29-2 confint.svyglm(method="Wald") wasn't using its ddf= argument,
+ because confint.default() doesn't (Anthony Damico)
+
+3.29 svystandardize() for direct standardization over domains
+
+ withReplicates() has a method for svrepstat objects
+
+ added predict.svrepglm(), which can return replicates
+
+ saddlepoint approximation to sum of chisquares works further out into the tails
+
+ fixed bug in rescaling in calibrate() when initial weights are very wrong (Takahiro Tsuchiya)
+
+ documented df= argument in svyciprop(method="mean") (Anthony Damico)
+
+ added df= argument to other svyciprop methods for Stata compatibility (Anthony Damico)
+
+3.28-3 svykappa didn't work for larger than 2x2 tables. (Jeffery Hughes)
+
+ svyby didn't allow deff="replace" (Francisco Fabuel)
+
+ svrepdesign(,type="other") now warns if scale, rscales arguments are not given
+
+ svystat, svrepstat objects now have a plot method (barplot, currently.)
+
+ svyplot(,type="bubble") now uses the basecol= argument for colors.
+
+ postStratify() now works when some input weights are zero
+
+3.28-2 calibrate() prints out sample and population totals when the lengths disagree
+
+ calibrate() is more stable when the initial weights are wrong by orders of
+ magnitude (for Kirill Mueller)
+
+ calibrate() can now take a list of margins as input, similar to rake()
+ (for various people including Kirill Mueller)
+
+3.28-1 SE now works with output of predict.svyglm (Kieran Healy)
+
+ make.panel.svysmooth() sometimes had invalid bandwidth choices.
+
+ as.svrepdesign() now allows for fpc information not present in
+ the design object (Alistair Gray)
+
+ regTermTest(,method="LRT") works for svyolr(), and
+ method="Wald" now doesn't need user-specified df (for Zachary Grinspan)
+
+ svrepdesign() checks the length of the rscales= argument (Ward Kinkade).
+
+ Document the problem with in-line data-dependent variable construction
+ in svyby() (Anthony Damico)
+
+ Check for completely-missing groups in svyby
+
+3.28 svyvar() for replicate-weight designs now returns whole matrix
+
+ withReplicates() has method for svyvar() output, to simplify multivariate analyses.
+
+ design effect estimate for svytotal with replicate weights was wrong (Daniel Fernandes)
+
+ transform() is now a synonym for update().
+
+ lots of partial argument matching removed to keep CRAN happy.
+
+3.27 added anova.svyglm() for Wald tests and Rao-Scott LRT.
+ anova(model1, model2) works for nested models even if not symbolically nested.
+
+ formula component of svyglm$call is now always named, so update() will work.
+
+ svyboxplot(,all.outliers=TRUE) didn't work for single boxes (Takahiro Tsuchiya)
+
+3.26-1 Better missing-value handling with replicated weights in svyquantile
+
+ svyboxplot() has all.outliers= argument to plot all outliers
+
+3.26 Added Preston's multistage rescaled bootstrap (for Alois Haslinger)
+
+ The multistage bootstrap can use the multicore package if available.
+
+ calibrate() can take a vector of tolerances (for Alois Haslinger)
+ [this actually used to work by accident, but now it's documented]
+
+ Clearer error messages when post-strata contain NAs.
+
+3.25-1 The ... argument to svytable() is now passed to xtabs()
+
+ Clearer documentation about graphing discrete variables.
+
+
+3.25 svyhist() didn't work for two-phase designs.
+
+ added svylogrank() for logrank test for survival data.
+
+ added svyranktest() for two-sample rank tests.
+
+ svrepdesign() and as.svrepdesign() now have mse= argument to request
+ replicate-weight variances centered around the point estimate rather
+ than the mean of the replicates. The default is controlled by
+ options(survey.replicates.mse), which defaults to FALSE, consistent with
+ previous versions. (For Anthony Damico, among others)
+
+3.24-1 CHANGE: svychisq() statistic="lincom" and "saddlepoint" now use the linear combination
+ of F statistics from pFsum().
+
+3.24 Rao-Scott test based on linear combinations of Fs is now also available in regTermTest
+
+ Algorithms from CompQuadForm (AS155 and AS204) now used for method="integration"
+ in pFsum and pchisqsum. These are more accurate and faster than the previous
+ implementations. If you use CRAN binary packages you will need at least R 2.12.
+
+ pFsum() saddlepoint and Satterthwaite methods are also much faster. The
+ saddlepoint approximation now works for the whole range, not just the right tail.
+
+
+3.23-3 Some vignettes didn't load the package (Brian Ripley)
+
+ Added pFsum() for linear combination of F distributions with same denominator.
+
+ better example (quantile regression) in withReplicates().
+
+3.23-2 svyhist() didn't handle include.lowest= correctly. (Chris Wild)
+
+ svyby(, return.replicates=TRUE) now returns the replicates in the same
+ order as the printed output, and labelled. (for Bob Fay)
+
+3.23-1 svycdf() wasn't handling replicate weights correctly.
+
+ Change in svyquantile() for replicate weights when using type="quantile".
+ Point estimate used to be mean of replicates, now is ordinary weighted quantile.
+ (for Bob Fay)
+
+ Small changes in handling of zero weights in svyquantile().
+
+3.23 two-sample svyttest() didn't work with replicate weights. (Richard Valliant)
+
+3.22-4 postStratify now allows 1-d matrix as well as vector in data frame
+ of population counts. (for Jean Opsomer)
+
+ print.summary.pps wasn't being exported (Gonzalo Perez)
+
+ svyhist() ignored right= argument
+
+ predict.svycoxph() was slightly overestimating standard errors for survival curves.
+
+ [.pps and [.twophase2 crashed when no observations were removed (Gonzalo Perez)
+
+3.22-3 bug in trimWeights (Richard Valliant), also add warning for
+ attempts to trim past the mean weight.
+
+3.22-2 bug in the na.rm.by= argument to svyby() (Trevor Thompson)
+
+ regTermTest() now does F tests by default (for Chris Wild)
+
+3.22-1 added df= argument to confint() methods for svystat,
+ svyrepstat, svyby, svyratio (for Richard Valliant)
+
+ added na.rm.by= argument to svyby(), to drop groups defined by
+ missing values of by= variables.
+
+ confint.svyby() uses SE(), not vcov(), so undefined values in replicates
+ are handled on a per-group basis.
+
+ svysmooth(,method="locpoly") now has automatic bandwidth choice, and
+ make.panel.svysmooth() will use this choice by default.
+
+3.22 added stratsample() to take stratified samples.
+
+ fixed bug in design effects for subsets of calibrated or
+ database-based surveys
+
+ changed scaling in biplot.svyprcomp so area is proportional to
+ weight, rather than height proportional to weight.
+
+3.21-3 svyratio() can now estimate design effects (for Scott Kostyshak)
+
+3.21-2 Rao & Wu bootstrap wasn't sampling n-1 PSUs (Richard Valliant)
+
+3.21-1 bug in printing variances for three or more variables (Corey Sparks)
+
+ svyquantile() reliably returns NA for NAs in data when na.rm=FALSE.
+
+ svymle() was not using analytical gradients with nlm() (Christian Raschke)
+
+3.21 added trimWeights() to trim weights, and trim= option to calibrate
+ (for Richard Valliant)
+
+ clearer documentation that svyquantile() needs ci=TRUE or keep.var=FALSE
+ to work with svyby()
+
+ added a simple random sample to data(api) as promised in book (Djalma Pessoa)
+
+3.20 in svycoxph() modify the rescaling of weights to avoid very small weights
+ because of convergence problem in coxph() with counting-process data (for Tapan Mehta)
+
+ added some multivariate statistics:
+ svyprcomp(): principal components, svyfactanal(): factor analysis.
+
+ added heuristic check that combined.weights= has been specified correctly.
+
+ confint.default wouldn't give CIs for multiple parameters with replicate weights, because
+ the vcov matrix didn't have variable names. (Art Burke)
+
+ More of the svyciprop() methods now work for replicate-weight designs.
+
+ The book of the package is now available! (see http://faculty.washington.edu/tlumley/svybook)
+
+
+3.19 svrepdesign() can specify replicate-weight columns with a regular expression
+
+ svrepdesign() can produce database-backed designs
+
+ svyquantile() has a df argument to use a t distribution in
+ Woodruff's method (for Wade Davis)
+
+ calibrate() doesn't require an intercept in the calibration model (for Richard Valliant)
+
+ regTermTest() and model.frame() work with svyolr() (for Michael Donohue)
+
+ better printing of svyvar() output (for Brad Fulton)
+
+ twophase() documents more clearly that method="simple" is preferred for standard epi
+ designs where it works.
+
+ better error messages when a database-backed design has a closed connection
+
+
+3.18-1 documented the need to use quasibinomial/quasipoisson in svyglm
+
+ improved the description of confidence intervals and standard errors for
+ svyquantile.
+
+
+3.18 Changed the default to combined.weights=TRUE in svrepdesign()
+
+ Fixed bug in multiple imputation analysis with multicore package.
+
+ The check for PSUs properly nested in strata had some false negatives.
+
+
+3.17 Under Linux, Mac OS, and most Unix systems, multiple processors can be
+ used for the subgroups in svyby(), the imputed data sets in with.svyimputationList
+ and with.DBsvyimputationList, and the replicate weights in svyglm.svyrep.design
+ and svyolr.svyrep.design. This requires the 'multicore' package and the
+ argument multicore=TRUE to the functions (in the absence of the multicore
+ package, the multicore=TRUE option is just ignored).
+
+ svyvar.svyrep.design handled NA values incorrectly (Arthur Burke)
+
+ print.summary.twophase2 wasn't exported, so summary(twophase.object)
+ gave Too Much Information (Norman Breslow)
+
+ svytotal.svyrep.design labelled the statistic it computed as 'mean',
+ although it really was the correct total. (Arthur Burke)
+
+ detection of PSUs not nested in strata was incorrect in some cases.
+
+ added xbins= option to svyplot for hexbin styles (for Bryan Shepherd)
+
+ print() method now has strata in a more predictable order (for Norman Breslow)
+
+ regTermTest(,method="LRT") now does Rao-Scott-type tests based on the estimated
+ loglikelihood ratio, for generalized linear models and the Cox model. Similarly,
+ confint.svyglm(,method="likelihood") does confidence intervals based on the
+ Rao-Scott-type likelihood ratio test.
+
+ Updated marginpred() to work with survival 2.35-7
+
+ Documentation fixes revealed by the new R pre-2.10 help parser
+
+ Added unwtd.count() to count the raw number of non-missing observations.
+
+ The new PPS designs now work with subset().
+
+
+3.16 PPS designs without replacement, based on the weighted covariance of
+ sample indicators: Horvitz-Thompson and Yates-Grundy estimators,
+ Overton's approximation, Hartley-Rao approximation, a modified
+ Hartley-Rao approximation that depends only on sample data.
+
+3.15-1 The new two-phase designs added in 3.15 are now exported properly.
+
+3.15 Full multistage sampling now possible at both phases of a
+ two-phase design, and the standard errors now exactly match
+ Sarndal et al. The underlying algorithms use sparse matrices
+ to store the weighted covariance of sampling indicators, and so
+ require the Matrix package. Use method="approx" in twophase()
+ to get the old methods, which use less memory.
+
+ added marginpred() for predictive margins, ie, predictions after
+ calibration/direct standardization on confounder distribution.
+
+ standard errors for predict.svyglm(,type="response") were
+ printing incorrectly.
+
+ as.data.frame.svrepstat now works when the result has
+ return.replicates=TRUE
+
+ The separate package odfWeave.survey provides methods for odfWeave::odfTable
+ for some survey objects.
+
+ formula() now works correctly on svykmlist objects with standard errors.
+
+3.14 predict.svycoxph() now does fitted survival curves with standard
+ errors for the Cox model. (for Norman Breslow)
+
+ standard errors for svykm use a bit less memory.
+
+ quantile.svykm can do confidence intervals
+
+ added some references on svykm standard errors.
+
+ tidied up some help pages.
+
+3.13 Add standard errors to svykm() (for Norman Breslow)
+
+ fix typo in svyquantile(interval.type="betaWald") and add
+ 'degrees of freedom' correction to the effective sample size.
+
+ add 'degrees of freedom' correction to effective sample size
+ in svyciprop, type="beta".
+
+ SE, coef for svyratio objects now optionally convert to a vector
+ and confint() now works on ratios.
+
+3.12 Add svyttest() for t-tests, as a wrapper for svyglm
+
+ Add svyciprop() for confidence intervals for proportions,
+ especially near 0 or 1
+
+ confint() works with svycontrast(), svyquantile(),
+ svyciprop() output.
+
+ bug fix for updates to ODBCsvydesign objects.
+
+ Add example of PPS sampling to example(svydesign), and link to
+ help for variance estimation. Add Berger(2004) reference.
+
+ svyby() now has vartype="ci" to report confidence intervals
+ (for Ron Burns)
+
+ update survival examples to work with new version of survival
+ package.
+
+
+3.11-2 Document that calibrate() to PSU totals requires at least as
+ many observations as calibration variables
+
+ pchisqsum(,type="saddlepoint") now works down to mean x 1.05
+ rather than mean x 1.2
+
+ The breaks= argument to svyhist() now works (Stas Kolenikov)
+
+ svyhist() works on database-backed designs.
+
+3.11-1 svyglm() [and svyratio()] gave an error for post-stratified
+ designs with missing data (Shelby Chartkoff)
+
+ svycoxph() gives a clearer error message for negative weights.
+
+ svyquantile() now has a 'betaWald' option, as proposed
+ by Korn & Graubard (1998), and has an option for handling
+ ties that appears similar to (some versions of) SUDAAN
+ (for Melanie Edwards)
+
+ plot.svycdf() has an xlab argument to override the default labels
+
+3.11 as.svrepdesign now has type="subbootweights" for
+ Rao and Wu n-1 bootstrap
+
+ An approximation for PPS without replacement due to Brewer
+ is available in svydesign()
+
+ svydesign() no longer warns if some fpc are exactly zero, but
+ still warns if they are suspiciously large or small
+
+3.10-1 svycoplot can now pass ... arguments to xyplot(), not just to panel.
+
+ svycontrast() has a 'default' method that assumes only a coef()
+ and vcov() method are available.
+
+ Fixed example code for anova.svyloglin.
+
+ Added predict(,type="terms"), termplot(), residuals(,type="partial")
+ for svyglm. As a result, the default for se= in
+ predict.svyglm has changed.
+
+ make.panel.svysmooth() makes a weighted smoother as a slot-in
+ replacement for panel.smooth(), eg in termplot().
+
+ print.summary.svyloglin was broken (Norm Breslow).
+
+ confint() method for svyglm has both Wald-type and
+ likelihood-type confidence intervals (based on Rao-Scott test)
+
+ documented that svykappa() requires factor variables.
+
+ svysmooth() doesn't fail when data are missing.
+
+ documented that update.svyloglin is faster than fitting a new
+ model.
+
+ dotchart() methods for svyby, svystat, svrepstat
+
+ svyloglin() handles missing data better.
+
+ svymle() didn't work if constant parameters were in any
+ position other than last.
+
+ svyby() now has a return.replicates argument (for Phil Smith).
+
+ logit and raking calibration could run into NaN problems with
+ impossible bounds. Step-halving seems to fix the problem.
+
+
+3.10 update() methods for database-backed designs.
+
+ improvements in graphics for subsets of database-backed designs.
+
+ barplot methods for svystat, svrepstat, svyby objects.
+
+ svytable() for database-backed designs
+
+ quantiles work with svyby(covmat=TRUE) for replicate-weight designs.
+
+ fix printing of p-value in svychisq, type="lincom"
+
+ better error messages for misspecified fpc in svydesign()
+
+ database-backed analysis of multiple imputations.
+
+ formatting changes to coef.svyquantile, SE.svyquantile, svyby
+
+ svrepdesign works with multiple imputations (though not with databases)
+
+ fix for missing factor levels in subsets of database-backed designs
+
+ allow svychisq(statistic='lincom') with replicate weights.
+
+ quantile regression smoothers in svysmooth()
+
+ add svychisq.twophase() (for Norm Breslow)
+
+ changed defaults in predict.svyglm so that plot.lm works
+ (for Patricia Berglund)
+
+ svyloglin() for loglinear models, with Wald and Rao-Scott tests.
+
+ pchisqsum() (and svychisq, anova.svylogin) have a saddlepoint approximation.
+
+
+3.9-1 improvments in svyby, degf, svyglm for subsets of calibrated designs or
+ database-backed designs.
+
+ svyboxplot() and svycdf() now work with database-backed designs.
+
+ ODBC support for database-backed designs.
+
+ modified the degrees of freedom calculation in svyglm.
+
+3.9 Added database-backed design objects. The data= argument to svydesign
+ can be the name of a database table in a relational database with a
+ DBI-compatible interface. Only the meta-data is kept in R, other variables
+ are loaded as necessary.
+
+3.8-2 Added svycoplot()
+
+3.8-1 Added subset.svyimputationList
+
+ coef.svyolr returns intercepts as well (by default).
+
+ svyolr() has a method for replicate-weight designs
+
+ print methods for svykm, svykmlist weren't exported.
+
+3.8 svyolr() for proportional odds and related models.
+
+ license is now GPL 2|3 to accomodate code ripped from MASS package
+
+ svykm() for survival curves (no standard errors yet)
+
+3.7 Added style="transparent" to svyplot().
+
+ svyby() and svytable() work on twophase objects.
+
+ svychisq() has statistic="lincom" for linear combination of chisquare,
+ the exact asymptotic distribution.
+
+ Added interface to mitools package for analyzing multiple imputations
+
+ svykappa() for Cohen's kappa (for Tobias Verbeke)
+
+3.6-13 Change in tolerances so that calibrate() works better with collinear
+ calibration variables (Richard Valliant)
+
+ calibrate() can be forced to return an answer even when the specified
+ accuracy was not achieved.
+
+3.6-12 svyhist() handles missing data better.
+
+ Added svycdf() for cumulative distribution function estimate.
+
+3.6-11 postStratify() for repweights was standardizing the replicates to
+ slightly wrong population totals. (Alistair Gray)
+
+ vcov() for two-phase designs gives the contributions from each phase
+ for a wider range of statistics. (Norman Breslow)
+
+ fixes for codetools warnings.
+
+3.6-10 Added error message for missing sampling indicator in two-phase
+ design (Lucia Hindorff)
+
+ Added tests/kalton.R with reweighting examples.
+
+ make.calfun() for creating user-specified calibration distances.
+
+ NOTE: Calling grake() directly now requires a calfun object rather than
+ a string: see help(make.calfun).
+
+3.6-9 Bootstrap weights used last stratum size rather than harmonic mean
+ for n/(n-1) factor (Djalma Pessoa)
+
+ method= argument to svycoxph() didn't work (Lisa McShane)
+
+ svyquantile did not treat missing values as a domain
+ (Nicole Glazer)
+
+ fix for change in pmax/pmin (Brian Ripley)
+
+ Add pchisqsum for distribution of quadratic forms.
+
+3.6-8 A fix in 3.6-6 had broken svycoxph when only a single predictor
+ variable was used (Lisa McShane)
+
+3.6-7 svycoxph() is much faster for replicate weights
+
+ degf.svyrep.design uses a cached value rather than
+ recomputing.
+
+3.6-6 svyquantile was not passing method= argument to approxfun()
+ (Jacques Ferrez)
+
+ Documented that svyquantile(interval.type="score") may not be any
+ more accurate
+
+ Broken link due to typo in svyratio.Rd (Giuseppe Antonaci)
+
+ postStratify could overestimate standard errors for post-strata cutting
+ across existing sampling strata. (Ben French)
+
+ svycoxph() would not run for subsets of calibrated designs.
+ (Norman Breslow)
+
+3.6-5 Add return.replicates option to svyratio() (for ine.pt)
+
+ Add amount= option to svyplot
+
+ Design effects for totals were wrong for PPS
+ sampling. (Takahiro Tsuchiya)
+
+3.6-4 rownames fix for svyratio with a single statistic.
+
+3.6-3 raking by rake() now has slightly more accurate (smaller)
+ standard errors. As a result, it can't be used on pre-2.9
+ svydesign objects.
+
+ calibrate() does not warn about name mismatches when population
+ argument has no names.
+
+ svyCprod, svyrecvar, grake now exported.
+
+3.6-2 covmat=TRUE option for svyratio.
+
+ svycontrast() fix for svyby() with empty groups (ine.pt)
+
+3.6-1 Allow averaged bootstrap weights (as StatCanada sometimes produces)
+ in svrepdesign()
+
+ Fix derivative to get faster convergence in logit calibration
+ (Diego Zardetto)
+
+ svycontrast() can take named vectors of just the non-zero coefficients.
+
+ Nonlinear combinations of statistics with svycontrast()
+
+3.6 Allow empty factor levels in calibration (for Diego Zardetto).
+
+ Work around for strange S4 class/NAMESPACE issue with hexbin
+ plots; actual fix requires more understanding.
+
+ regTermTest handles MIresult objects.
+
+ Add dimnames, colnames, rownames methods (for ine.pt)
+
+ svysmooth for scatterplot smoothers and density estimation
+ (needs KernSmooth)
+
+ Give a warning when fpc varies within strata.
+
+ svycontrast() for linear combinations of survey statistics
+
+ covmat=TRUE option to svyby() for replicate-weight designs, so
+ the output can be used in svycontrast().
+
+3.5 Add estWeights for Robins et al way of using auxiliary
+ information (ie AIPW).
+
+ Remove JSS article and survey-vanderbilt.pdf from inst/
+ since they are now seriously out of date.
+
+ paley() now gives matrices of order 2^k(p+1), which are
+ usually of minimal or near-minimal size.
+
+ Drop 72x72 and 256x256 Hadamard matrices, which are easy
+ to recreate, from precomputed set and replace 36x36 with the
+ one from Plackett & Burman, which has full orthogonal balance
+
+ Note that changes to svyby now require R 2.2.0 or later.
+
+ predict.svyglm has option to return just variances (rather
+ than entire variance-covariance matrix)
+
+ drop.empty.groups now works when the grouping variables
+ are not factors.
+
+ Add a namespace
+
+ Move precomputed Hadamard matrices from inst/hadamard.rda to
+ R/sysdata.rda
+
+3.4-5 Add svyboxplot (for Luke Peterson)
+
+ Add drop.empty.groups option to svyby
+
+3.4-4 Paley construction of Hadamard matrices now knows primes
+ up to 7919, works for larger sizes if the user supplies
+ a suitable prime.
+
+ calibrate() now reorders elements of 'population' to match
+ column names of model matrix if necessary.
+
+ predict() method for svyglm (for Phil Smith, Andrew Robinson)
+
+ svyratio() for two-phase designs.
+
+ Added vignette on domain estimation.
+
+ svyby() can report multiple vartypes.
+
+3.4-3 make svyratio work with svyby (for Phil Smith)
+
+ increase default number of iterations in calibrate()
+
+3.4-2 Options for residual df for summary.svyglm, default based on degf
+ Default denominator df for svyglm, svycoxph in regTermTest.
+
+ survey.lonely.psu now applies to as.svrepdesign.
+
+ keep up with changes in all.equal() for R 2.3.0
+
+3.4-1 Speed optimizations for JKn weights with self-representing strata
+ - jackknife replicates are not created for these strata
+ - svytotal does not use these strata in variance calculation.
+ - svytotal, svymean, svyratio,svyquantile,svyglm recognize
+ designs (eg subsets) where all strata are self-representing.
+
+ [.repweights_compressed does less copying and is a lot faster
+ for large designs
+
+ Added verbose= option to svyby() to monitor slow computations.
+
+ Added vartype="cv","cvpct" options for svyby().
+
+ Two-phase designs gave incorrect variances in some cases [they
+ were correct if the first stage was infinite superpopulation
+ sampling or if all phase 1 ultimate sampling units were
+ represented in phase 2]. These are fixed but twophase() now
+ limits the first phase to single-stage cluster or element
+ sampling. [detailed bug report from Takahiro Tsuchiya]
+
+ added vignette describing estimator of phase-one variance in
+ two-phase designs
+
+ minor speedup in svyrecvar() for self-representing strata
+
+ added make.formula() for convenience with many variables.
+
+
+3.4 twophase() for specifying two-phase designs.
+
+ two vignettes: a simple example and a description of two-phase epi designs
+
+ svyratio handles missing data.
+
+ cv() gives NaN rather than an error when the statistic is zero (for
+ ana.pmartins at ine.pt)
+
+ oldsvydesign() is officially deprecated
+
+ Jackknife variances for strata with a single population PSU were wrong
+ (non-zero) (ana.pmartins at ine.pt)
+
+ svyglm refused to work on subsets of calibrated designs
+
+3.3-2 Add cv, SE, coef, and deff methods for svyby (for Ana Quiterio)
+
+ as.data.frame methods for svystat, svrepstat
+
+ regTermTest can do F-tests now (Daryl Morris).
+
+ fix documentation of value for as.svrepdesign (Alan Zaslavsky)
+
+3.3-1 Make nest=TRUE in multistage designs work when only some initial
+ sampling stages are stratified
+
+ Multistage recursive variances were only going to two stages.
+
+ Add "(with replacement)" to output of print.survey.design2 when
+ no fpc is specified.
+
+3.3 Added more generalized raking estimators: raking ratio, bounded raking
+ ratio, logit, (for Ana Quiterio)
+
+ svytable() could sometimes leave the class attribute off the result.
+ summary() now gives tests of association for svytable().
+
+ svychisq() works for replicate designs
+
+ degf() gives approximate degrees of freedom for replicate designs.
+
+ Clearer error messages when design information is missing.
+
+3.2-1 Fix ordering bug in ftable.svyby (Stefano Calza)
+
+ The "probability" option added to svyquantile for replicate designs
+ in 3.1 computed standard errors for the wrong tail. (Gillian Raab).
+
+3.2 Add option to calibrate() to make weights constant within clusters.
+
+ Add bounded regression calibration to calibrate()
+
+3.1-1 Rescale svyvar output by n/(n-1) to match Kish, which makes a small
+ difference to design effect computations. (for Takahiro Tsuchiya)
+
+ Test for presence of intercept in calibrate() was too fussy.
+
+3.1 Quantiles for replicate-weight designs now by default compute confidence
+ intervals on the probability scale and transform, so they are valid for
+ jackknife designs. (as Gillian Raab suggested long ago)
+
+ Analyses on replicate weights should use eg svymean, which has
+ methods for replicate weight designs; the old (eg svrepmean) variants
+ are now deprecated.
+
+ calibrate() can now use regression models with variance proportional
+ to linear combination of predictors (and so can duplicate ratio
+ estimators of means and totals)
+
+ Prettier labelling of objects created by postStratify(), calibrate(),
+ update(), subset()
+
+ svytotal on replicate weight designs was computing means, not totals
+ (probably since 3.0).
+
+3.0-1 Allow some strata to have an infinite population (zero sampling fraction)
+ (this doesn't happen in reality but is the recommended analysis for
+ handling certainty PSUs in some large NCHS studies).
+
+ Let svyby() handle vectors that are not in the design object (even
+ though they are discouraged)
+
+ calibrate() was working only under stratified/simple random sampling.
+
+ Allow user-supplied Hadamard matrix for brrweights.
+
+ as.svrepdesign gave a spurious warning when converting post-2.9-1
+ objects without finite population corrections to BRR.
+
+ Allow multicolumn response variable in svymle() (for survival data)
+
+ Add nlm() as the default optimization method for svymle().
+
+3.0 Added simple GREG (G-calibration) estimators with calibrate()
+
+ Added deff="replace" option to compute design effects comparing to
+ simple random sampling with replacement, eg for designs where the weights
+ do not sum to the population size. (for Gillian Raab)
+
+ Added more references for median estimation.
+
+ Added separate ratio estimator of totals for stratified
+ samples. (for Renzo Vettori)
+
+ cv.svyratio was inverted.
+
+ rake() on survey design objects was accumulating cruft in the
+ postStrata component on each iteration.
+
+ Subsetting of raked designs without replicate weights was
+ broken (Steve Roberts)
+
+ Standard errors were wrong for some domain estimates in
+ post-stratified models without replicate weights.
+
+ More extensive tests comparing domain estimates to equivalent
+ ratio and regression formulations.
+
+ Changed default in svyby to keep.var=TRUE
+
+ Prettier stratum labels.
+
+ New homepage at http://faculty.washington.edu/tlumley/survey/
+
+ svyplot(type="hex") works with both pre1.0 and post1.0 versions
+ of the hexbin package.
+
+ Fixed svychisq denominator degrees of freedom for stratified designs
+ for bug introduced by multistage revision. (Takahiro Tsuchiya)
+
+
+2.9-1 Fixed typo in description of fpc in svydesign.Rd
+
+ Added inst/twostage.pdf with examples of two-stage analyses.
+
+ Handling of fpc specified as proportion in the absence of weights
+ was wrong.
+
+2.9 Added full multistage sampling, involving a redesign of the survey.design
+ object. The old objects are deprecated; they may be converted with
+ as.svydesign2. Use options(survey.ultimate.cluster=TRUE) to get
+ the same one-stage standard errors as earlier versions and
+ options(survey.want.obsolete=TRUE) to turn off the annoying warnings
+ about old-style survey objects. If you must create old-style survey
+ objects use oldsvydesign().
+
+ As a consequence of the redesign, most of the svyxxx functions
+ are now generic, with methods for both svydesign and svrepdesign
+ objects. Use svymean instead of svrepmean, for example.
+
+ Added more Hadamard matrices, including the Paley construction.
+ brrweights() now finds designs of nearly optimal size for most surveys.
+
+ Faster svymean, svytotal for replicates, with less memory use.
+
+ Added "bootstrap" option for as.svrepdesign
+
+ svyby and ftable.svyby now handle Deff (expanded from a suggestion
+ by Tobias Verbeke)
+
+ svyhist() for probability-weighted histograms
+
+ added svycoxph() for replicate weight designs
+
+ The "lonely.psu" corrections will be applied to strata with a single
+ PSU in a subset (domain) if options("survey.adjust.domain.lonely")
+ is TRUE. The default is FALSE.
+
+ subset.survey.design was not working for post-stratified designs.
+
+ Added a PDF file with examples from UCLA ATS web site, including
+ comparisons with WesVar and SUDAAN. (inst/ucla-examples.pdf)
+
+ Added slides from a talk at Vanderbilt University.
+ (inst/survey-vanderbilt.pdf)
+
+ Fixed Deff to use simple random sampling without replacement.
+
+ Much faster confidence intervals for quantiles based on inverting a
+ Wald test are now default. These are less accurate in small
+ samples; the old method is still available.
+ (based on suggestion from Gillian Raab)
+
+
+2.8-4 Added a whole lot more references to the documentation.
+
+ data(hospital) now has two sets of weights, one matching the
+ UCLA ATS site and one matching the original reference.
+ (from Tobias Verbeke)
+
+ summary.svyrep.design was reporting 1 replicates for compressed
+ weights (but still computing correctly)
+
+2.8-3 postStratify for svydesign objects was giving too large standard errors
+
+ Add deff() to extract design effects.
+
+2.8-2 rewrite cv() to use coef() and SE()
+
+2.8-1 Make Deff estimates work with ftable. (for Gillian Raab)
+
+ ftable.svyby didn't work with a single by() variable (for Gillian Raab)
+
+ Missing values now allowed in svychisq(). (for Lee Sieswerda)
+
+2.8 fix printing of svyby broken in 2.7
+
+ add ftable.svyby
+
+ postStratify for svydesign surveys.
+
+2.7-1 as.svrepdesign was giving the wrong weights for type="Fay" in 2.7
+
+2.7 Option compress=TRUE in as.svrepdesign to reduce size
+ of replicate weight matrix (and in rake(), postStratify()). Also
+ function compressWeights() to do this to arbitrary replicate designs.
+
+ terms() reorders variables in interactions, which confused regTermTest
+ (Daniel Almirall)
+
+ Added extractor function SE() for standard errors (Andrew Robinson)
+
+ hadamard() now finds smaller Hadamard matrices.
+
+ svyCprod warns if a subset has only one PSU in some stratum
+ (Gillian Raab)
+
+ Added tests/lonely.psu.R
+
+ Added another option "average" for lonely.psu (Gillian Raab)
+
+ svydesign can now detect from sampling weights or fpc when a stratum
+ with a single PSU is self-representing, and in these cases
+ options("survey.lonely.psu") is not used.
+
+ ftable.svystat and ftable.svrepstat to produce better tables of
+ percentages and totals.
+
+ Experimental set of functions to help in computing non-response weights
+ (see ?nonresponse for details)
+
+2.6-2 Better handling of NAs in svyby
+
+ Subsetting didn't work right for single-observation subsets.
+
+ svyglm and svycoxph had scoping problems when run inside a
+ function (Daniel Almirall)
+
+ svyglm and svycoxph now accept weights (to be multiplied by
+ the sampling weights)
+
+ With R 2.0.0 less copying will occur, especially when variables=
+ is not specified in a design
+
+2.6-1 Totals for factors give cell totals.
+
+2.6 Design effects were broken for multiple means computed at once.
+
+ Add coefficient of variation for mean, total, ratio,...
+
+ variables= argument of svydesign works with missing data (Tobias Verbeke)
+
+ Fix reference to Binder (1991) (Tobias Verbeke)
+
+ Means for factors now give cell means.
+
+ coef and vcov methods for svystat and svrepstat.
+
+ Another tiny example dataset from the VPLX manual
+
+ svrepvar was incorrect for multiple variables simultaneously
+
+ Better error messages for missing data in svrVar.
+
+2.5 Wald tests for association in contingency tables.
+
+ svyplot() for weighted graphics (some of these require "hexbin")
+
+ Examples for rake(), postStratify()
+
+ svyby() works for svrepdesign analyses as well
+ added subset.svyrep.design()
+
+ svrepvar() added
+
+ Design effects for means and totals. (Gillian Raab)
+
+2.4 Make regTermTest work with svycoxph()
+
+ Clearer output for print.svycoxph() (Daniella Gollinelli)
+
+ Rao-Scott adjusted tests for contingency tables.
+
+ svyby() for tables of means, medians, etc
+
+2.3-2 Fix for svyquantile confidence intervals.
+
+2.3-1 clearer warnings in svrVar when some replicates give NA .
+ (for Gillian Raab)
+
+2.3 svyquantile has confidence intervals, added svrepquantile.
+
+2.2-1 as.svrepdesign didn't pass options to brrweights (for Fred Rohde)
+
+2.2 published in Journal of Statistical Software
+ - If population size is specified, but not weights or probabilities,
+ work out the probabilities from the population size
+ - Clearer error message when some design information is NA
+ (for Tobias Verbeke)
+ - better update() methods
+
+2.0 Just a numbering change.
+
+1.9-3 Fix svytotal variance estimate
+ as.svrepdesign wasn't handling unstratified cluster samples right.
+ Check for fpc in multistage samples, which we don't handle.
+ add print method for basic survey statistics
+ add rake()
+ California API data.
+
+1.9-2 Added post-stratification of replicate-weights
+
+1.9-1 Bugfix: jknweights was requiring finite population correction.
+
+1.9 - "certainty" option for single-PSU strata
+ - Replication weight analyses (alpha version)
+
+1.4 - I think all the possible permutations of arguments
+ in svydesign now work.
+ - The examples in svyglm incorrectly had a data= argument.
+
+1.3 svydesign wasn't allowing weights to be a vector.
+
+1.2 - svydesign(nest=TRUE) now uses less memory
+ - added regTermTest for testing regression terms.
+
+1.1 Added subset, update methods. Variance estimation is now correct for
+ subpopulations produced with select or subscripting.
+
+1.0 No changes
+
+0.9-5 - finite population correction should be done with PSUs not
+ individuals
+ - added Cox models
+
+0.9-4 - svyCprod was computing n/(n-1) using number of observations,
+ not number of PSUs, and was averaging observations rather than
+ PSU means to compute stratum means.
+ - Bug in handling multiple levels of cluster id in svydesign
+
+0.9-3: Finite population correction.
+ Adjustments for stratum with single PSU (Fred Rohde)
+ Fixed svydesign(nest=TRUE) to work with strata
+
+0.9-1: First release.
+
diff --git a/inst/api.db b/inst/api.db
new file mode 100644
index 0000000..55b6c41
Binary files /dev/null and b/inst/api.db differ
diff --git a/inst/disclaimer b/inst/disclaimer
new file mode 100644
index 0000000..67b001f
--- /dev/null
+++ b/inst/disclaimer
@@ -0,0 +1,8 @@
+This software comes with NO WARRANTY WHATSOEVER. This product has not
+been evaluated by the Food and Drug Administration and is not intended
+to diagnose, treat, cure, or prevent any disease. If it breaks you get
+to keep both pieces. Not tested on animals. Your mileage may vary.
+Keep out of reach of babies and small children. For external use
+only. Contents may have settled during shipping. Times are approximate.
+Batteries not included. Product of more than one country.
+May contain nuts.
diff --git a/inst/doc/domain.R b/inst/doc/domain.R
new file mode 100644
index 0000000..9f8bf2e
--- /dev/null
+++ b/inst/doc/domain.R
@@ -0,0 +1,67 @@
+### R code from vignette source 'domain.Rnw'
+
+###################################################
+### code chunk number 1: domain.Rnw:29-34
+###################################################
+library(survey)
+data(fpc)
+dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE)
+dsub<-subset(dfpc,x>4)
+svymean(~x,design=dsub)
+
+
+###################################################
+### code chunk number 2: domain.Rnw:41-42
+###################################################
+svyby(~x,~I(x>4),design=dfpc, svymean)
+
+
+###################################################
+### code chunk number 3: domain.Rnw:49-50
+###################################################
+summary(svyglm(x~I(x>4)+0,design=dfpc))
+
+
+###################################################
+### code chunk number 4: domain.Rnw:57-58
+###################################################
+svyratio(~I(x*(x>4)),~as.numeric(x>4), dfpc)
+
+
+###################################################
+### code chunk number 5: domain.Rnw:76-84
+###################################################
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
+gclus1 <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069))
+
+svymean(~api00, subset(gclus1, comp.imp=="Yes"))
+svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), gclus1)
+summary(svyglm(api00~comp.imp-1, gclus1))
+
+
+###################################################
+### code chunk number 6: domain.Rnw:88-94
+###################################################
+data(mu284)
+dmu284<-svydesign(id=~id1+id2,fpc=~n1+n2, data=mu284)
+
+svymean(~y1, subset(dmu284,y1>40))
+svyratio(~I(y1*(y1>40)),~as.numeric(y1>40),dmu284)
+summary(svyglm(y1~I(y1>40)+0,dmu284))
+
+
+###################################################
+### code chunk number 7: domain.Rnw:100-108
+###################################################
+library("survival")
+data(nwtco)
+nwtco$incc2<-as.logical(with(nwtco, ifelse(rel | instit==2,1,rbinom(nrow(nwtco),1,.1))))
+dccs8<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~interaction(rel,stage,instit)),
+ data=nwtco, subset=~incc2)
+svymean(~rel, subset(dccs8,age>36))
+svyratio(~I(rel*as.numeric(age>36)), ~as.numeric(age>36), dccs8)
+summary(svyglm(rel~I(age>36)+0, dccs8))
+
+
diff --git a/inst/doc/domain.Rnw b/inst/doc/domain.Rnw
new file mode 100644
index 0000000..46f3703
--- /dev/null
+++ b/inst/doc/domain.Rnw
@@ -0,0 +1,111 @@
+\documentclass{article}
+\usepackage{url}
+%\VignetteIndexEntry{Estimates in subpopulations}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{Estimates in subpopulations.}
+
+\begin{document}
+\maketitle
+
+Estimating a mean or total in a subpopulation (domain) from a survey, eg the
+mean blood pressure in women, is not done simply by taking the subset
+of data in that subpopulation and pretending it is a new survey. This
+approach would give correct point estimates but incorrect standard
+errors.
+
+The standard way to derive domain means is as ratio estimators. I
+think it is easier to derive them as regression coefficients. These
+derivations are not important for R users, since subset operations on
+survey design objects automatically do the necessary adjustments, but
+they may be of interest. The various ways of constructing domain mean
+estimators are useful in quality control for the survey package, and
+some of the examples here are taken from
+\texttt{survey/tests/domain.R}.
+
+
+Suppose that in the artificial \texttt{fpc} data set we want to
+estimate the mean of \texttt{x} when \texttt{x>4}.
+<<>>=
+library(survey)
+data(fpc)
+dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE)
+dsub<-subset(dfpc,x>4)
+svymean(~x,design=dsub)
+@
+
+The \texttt{subset} function constructs a survey design object with
+information about this subpopulation and \texttt{svymean} computes the
+mean. The same operation can be done for a set of subpopulations with
+\texttt{svyby}.
+<<>>=
+svyby(~x,~I(x>4),design=dfpc, svymean)
+@
+
+In a regression model with a binary covariate $Z$ and no intercept,
+there are two coefficients that estimate the mean of the outcome
+variable in the subpopulations with $Z=0$ and $Z=1$, so we can
+construct the domain mean estimator by regression.
+<<>>=
+summary(svyglm(x~I(x>4)+0,design=dfpc))
+@
+
+Finally, the classical derivation of the domain mean estimator is as a
+ratio where the numerator is $X$ for observations in the domain and 0
+otherwise and the denominator is 1 for observations in the domain and
+0 otherwise
+<<>>=
+svyratio(~I(x*(x>4)),~as.numeric(x>4), dfpc)
+@
+
+The estimator is implemented by setting the sampling weight to zero
+for observations not in the domain. For most survey design objects
+this allows a reduction in memory use, since only the number of zero
+weights in each sampling unit needs to be kept. For more complicated
+survey designs, such as post-stratified designs, all the data are kept
+and there is no reduction in memory use.
+
+
+\subsection*{More complex examples}
+Verifying that \texttt{svymean} agrees with the ratio and regression
+derivations is particularly useful for more complicated designs where
+published examples are less readily available.
+
+This example shows calibration (GREG) estimators of domain means for
+the California Academic Performance Index (API).
+<<>>=
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
+gclus1 <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069))
+
+svymean(~api00, subset(gclus1, comp.imp=="Yes"))
+svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), gclus1)
+summary(svyglm(api00~comp.imp-1, gclus1))
+@
+
+Two-stage samples with full finite-population corrections
+<<>>=
+data(mu284)
+dmu284<-svydesign(id=~id1+id2,fpc=~n1+n2, data=mu284)
+
+svymean(~y1, subset(dmu284,y1>40))
+svyratio(~I(y1*(y1>40)),~as.numeric(y1>40),dmu284)
+summary(svyglm(y1~I(y1>40)+0,dmu284))
+@
+
+Stratified two-phase sampling of children with Wilm's Tumor,
+estimating relapse probability for those older than 3 years (36
+months) at diagnosis
+<<>>=
+library("survival")
+data(nwtco)
+nwtco$incc2<-as.logical(with(nwtco, ifelse(rel | instit==2,1,rbinom(nrow(nwtco),1,.1))))
+dccs8<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~interaction(rel,stage,instit)),
+ data=nwtco, subset=~incc2)
+svymean(~rel, subset(dccs8,age>36))
+svyratio(~I(rel*as.numeric(age>36)), ~as.numeric(age>36), dccs8)
+summary(svyglm(rel~I(age>36)+0, dccs8))
+@
+
+\end{document}
diff --git a/inst/doc/domain.pdf b/inst/doc/domain.pdf
new file mode 100644
index 0000000..5f9c1e9
Binary files /dev/null and b/inst/doc/domain.pdf differ
diff --git a/inst/doc/epi.R b/inst/doc/epi.R
new file mode 100644
index 0000000..81918e8
--- /dev/null
+++ b/inst/doc/epi.R
@@ -0,0 +1,135 @@
+### R code from vignette source 'epi.Rnw'
+
+###################################################
+### code chunk number 1: epi.Rnw:45-61
+###################################################
+library(survey)
+load(system.file("doc","nwts.rda",package="survey"))
+nwtsnb<-nwts
+nwtsnb$case<-nwts$case-nwtsb$case
+nwtsnb$control<-nwts$control-nwtsb$control
+
+a<-rbind(nwtsb,nwtsnb)
+a$in.ccs<-rep(c(TRUE,FALSE),each=16)
+
+b<-rbind(a,a)
+b$rel<-rep(c(1,0),each=32)
+b$n<-ifelse(b$rel,b$case,b$control)
+index<-rep(1:64,b$n)
+
+nwt.exp<-b[index,c(1:3,6,7)]
+nwt.exp$id<-1:4088
+
+
+###################################################
+### code chunk number 2: epi.Rnw:65-66
+###################################################
+glm(rel~factor(stage)*factor(histol), family=binomial, data=nwt.exp)
+
+
+###################################################
+### code chunk number 3: epi.Rnw:75-79
+###################################################
+dccs2<-twophase(id=list(~id,~id),subset=~in.ccs,
+ strata=list(NULL,~interaction(instit,rel)),data=nwt.exp)
+
+summary(svyglm(rel~factor(stage)*factor(histol),family=binomial,design=dccs2))
+
+
+###################################################
+### code chunk number 4: epi.Rnw:88-94
+###################################################
+dccs8<-twophase(id=list(~id,~id),subset=~in.ccs,
+ strata=list(NULL,~interaction(instit,stage,rel)),data=nwt.exp)
+gccs8<-calibrate(dccs2,phase=2,formula=~interaction(instit,stage,rel))
+
+summary(svyglm(rel~factor(stage)*factor(histol),family=binomial,design=dccs8))
+summary(svyglm(rel~factor(stage)*factor(histol),family=binomial,design=gccs8))
+
+
+###################################################
+### code chunk number 5: epi.Rnw:122-126
+###################################################
+library(survey)
+library(survival)
+data(nwtco)
+ntwco<-subset(nwtco, !is.na(edrel))
+
+
+###################################################
+### code chunk number 6: epi.Rnw:130-131
+###################################################
+coxph(Surv(edrel, rel)~factor(stage)+factor(histol)+I(age/12),data=nwtco)
+
+
+###################################################
+### code chunk number 7: epi.Rnw:143-155
+###################################################
+(dcch<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
+ subset=~I(in.subcohort | rel), data=nwtco))
+svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=dcch)
+
+subcoh <- nwtco$in.subcohort
+selccoh <- with(nwtco, rel==1|subcoh==1)
+ccoh.data <- nwtco[selccoh,]
+ccoh.data$subcohort <- subcoh[selccoh]
+cch(Surv(edrel, rel) ~ factor(stage) + factor(histol) + I(age/12),
+ data =ccoh.data, subcoh = ~subcohort, id=~seqno,
+ cohort.size=4028, method="LinYing")
+
+
+###################################################
+### code chunk number 8: epi.Rnw:165-176
+###################################################
+nwtco$eventrec<-rep(0,nrow(nwtco))
+nwtco.extra<-subset(nwtco, rel==1)
+nwtco.extra$eventrec<-1
+nwtco.expd<-rbind(subset(nwtco,in.subcohort==1),nwtco.extra)
+nwtco.expd$stop<-with(nwtco.expd,
+ ifelse(rel & !eventrec, edrel-0.001,edrel))
+nwtco.expd$start<-with(nwtco.expd,
+ ifelse(rel & eventrec, edrel-0.001, 0))
+nwtco.expd$event<-with(nwtco.expd,
+ ifelse(rel & eventrec, 1, 0))
+nwtco.expd$pwts<-ifelse(nwtco.expd$event, 1, 1/with(nwtco,mean(in.subcohort | rel)))
+
+
+###################################################
+### code chunk number 9: epi.Rnw:185-189
+###################################################
+(dBarlow<-svydesign(id=~seqno+eventrec, strata=~in.subcohort+rel,
+ data=nwtco.expd, weight=~pwts))
+svycoxph(Surv(start,stop,event)~factor(stage)+factor(histol)+I(age/12),
+ design=dBarlow)
+
+
+###################################################
+### code chunk number 10: epi.Rnw:194-197
+###################################################
+(dWacholder <- as.svrepdesign(dBarlow,type="bootstrap",replicates=500))
+svycoxph(Surv(start,stop,event)~factor(stage)+factor(histol)+I(age/12),
+ design=dWacholder)
+
+
+###################################################
+### code chunk number 11: epi.Rnw:209-217
+###################################################
+load(system.file("doc","nwtco-subcohort.rda",package="survey"))
+nwtco$subcohort<-subcohort
+
+d_BorganII <- twophase(id=list(~seqno,~seqno),
+ strata=list(NULL,~interaction(instit,rel)),
+ data=nwtco, subset=~I(rel |subcohort))
+(b2<-svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=d_BorganII))
+
+
+###################################################
+### code chunk number 12: epi.Rnw:222-225
+###################################################
+d_BorganIIps <- calibrate(d_BorganII, phase=2, formula=~age+interaction(instit,rel,stage))
+svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=d_BorganIIps)
+
+
diff --git a/inst/doc/epi.Rnw b/inst/doc/epi.Rnw
new file mode 100644
index 0000000..ee2d7de
--- /dev/null
+++ b/inst/doc/epi.Rnw
@@ -0,0 +1,250 @@
+\documentclass{article}
+\usepackage{url}
+\addtolength{\topmargin}{-0.5in}
+\addtolength{\textheight}{0.75in}
+\addtolength{\oddsidemargin}{-0.5in}
+\addtolength{\textwidth}{1in}
+%\VignetteIndexEntry{Two-phase designs in epidemiology}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{Two-phase designs in epidemiology}
+
+\begin{document}
+\maketitle
+This document explains how to analyse case--cohort and two-phase
+case--control studies with the ``survey'' package, using examples from
+\url{http://faculty.washington.edu/norm/software.html}. Some of the
+examples were published by Breslow \& Chatterjee (1999).
+
+The data are relapse rates from the National Wilm's Tumor
+Study (NWTS). Wilm's Tumour is a rare cancer of the kidney in
+children. Intensive treatment cures the majority of cases, but
+prognosis is poor when the disease is advanced at diagnosis and for
+some histological subtypes. The histological characterisation of the
+tumour is difficult, and histological group as determined by the NWTS
+central pathologist predicts much better than determinations by local
+institution pathologists. In fact, local institution histology can be
+regarded statistically as a pure surrogate for the central lab
+histology.
+
+In these examples we will pretend that the (binary) local institution
+histology determination (\texttt{instit}) is avavailable for all
+children in the study and that the central lab histology
+(\texttt{histol}) is obtained for a probability sample of specimens in
+a two-phase design. We treat the initial sampling of the study as
+simple random sampling from an infinite superpopulation. We also have
+data on disease stage, a four-level variable; on relapse; and on time
+to relapse.
+
+\section*{Case--control designs}
+
+Breslow \& Chatterjee (1999) use the NWTS data to illustrate two-phase
+case--control designs. The data are available at
+\url{http://faculty.washington.edu/norm/software.html} in compressed
+form; we first expand to one record per patient.
+<<>>=
+library(survey)
+load(system.file("doc","nwts.rda",package="survey"))
+nwtsnb<-nwts
+nwtsnb$case<-nwts$case-nwtsb$case
+nwtsnb$control<-nwts$control-nwtsb$control
+
+a<-rbind(nwtsb,nwtsnb)
+a$in.ccs<-rep(c(TRUE,FALSE),each=16)
+
+b<-rbind(a,a)
+b$rel<-rep(c(1,0),each=32)
+b$n<-ifelse(b$rel,b$case,b$control)
+index<-rep(1:64,b$n)
+
+nwt.exp<-b[index,c(1:3,6,7)]
+nwt.exp$id<-1:4088
+@
+
+As we actually do know \texttt{histol} for all patients we can fit the logistic regression model with full sampling to compare with the two-phase analyses
+<<>>=
+glm(rel~factor(stage)*factor(histol), family=binomial, data=nwt.exp)
+@
+
+ The second phase sample consists of all patients with unfavorable
+ histology as determined by local institution pathologists, all cases,
+ and a 20\% sample of the remainder. Phase two is thus a stratified
+ random sample without replacement, with strata defined by the
+ interaction of \texttt{instit} and \texttt{rel}.
+
+<<>>=
+dccs2<-twophase(id=list(~id,~id),subset=~in.ccs,
+ strata=list(NULL,~interaction(instit,rel)),data=nwt.exp)
+
+summary(svyglm(rel~factor(stage)*factor(histol),family=binomial,design=dccs2))
+@
+
+Disease stage at the time of surgery is also recorded. It could be
+used to further stratify the sampling, or, as in this example, to
+post-stratify. We can analyze the data either pretending that the
+sampling was stratified or using \texttt{calibrate} to post-stratify
+the design.
+
+<<>>=
+dccs8<-twophase(id=list(~id,~id),subset=~in.ccs,
+ strata=list(NULL,~interaction(instit,stage,rel)),data=nwt.exp)
+gccs8<-calibrate(dccs2,phase=2,formula=~interaction(instit,stage,rel))
+
+summary(svyglm(rel~factor(stage)*factor(histol),family=binomial,design=dccs8))
+summary(svyglm(rel~factor(stage)*factor(histol),family=binomial,design=gccs8))
+@
+
+
+\section*{Case--cohort designs}
+In the case--cohort design for survival analysis, a $P$\% sample of a cohort
+is taken at recruitment for the second phase, and all participants who
+experience the event (cases) are later added to the phase-two sample.
+
+Viewing the sampling design as progressing through time in this way,
+as originally proposed, gives a double sampling design at phase two.
+It is simpler to view the process \emph{sub specie aeternitatis}, and
+to note that cases are sampled with probability 1, and controls with
+probability $P/100$. The subcohort will often be determined
+retrospectively rather than at recruitment, giving stratified random
+sampling without replacement, stratified on case status. If the
+subcohort is determined prospectively we can use the same analysis,
+post-stratifying rather than stratifying.
+
+There have been many analyses proposed for the case--cohort design
+(Therneau \& Li, 1999). We consider only those that can be expressed as a
+Horvitz--Thompson estimator for the Cox model.
+
+
+
+First we load the data and the necessary packages. The version of the
+NWTS data that includes survival times is not identical to the data
+set used for case--control analyses above.
+<<>>=
+library(survey)
+library(survival)
+data(nwtco)
+ntwco<-subset(nwtco, !is.na(edrel))
+@
+
+Again, we fit a model that uses \texttt{histol} for all patients, to compare with the two-phase design
+<<>>=
+coxph(Surv(edrel, rel)~factor(stage)+factor(histol)+I(age/12),data=nwtco)
+@
+
+We define a two-phase survey design using simple random
+superpopulation sampling for the first phase, and sampling without
+replacement stratified on \texttt{rel} for the second phase. The
+\texttt{subset} argument specifies that observations are in the phase-two sample if they are in the subcohort or are cases. As before, the data structure is rectangular, but variables measured at phase two may be \texttt{NA} for participants not included at phase two.
+
+We compare the result to that given by \texttt{survival::cch} for Lin
+\& Ying's (1993) approach to the case--cohort design.
+
+
+<<>>=
+(dcch<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
+ subset=~I(in.subcohort | rel), data=nwtco))
+svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=dcch)
+
+subcoh <- nwtco$in.subcohort
+selccoh <- with(nwtco, rel==1|subcoh==1)
+ccoh.data <- nwtco[selccoh,]
+ccoh.data$subcohort <- subcoh[selccoh]
+cch(Surv(edrel, rel) ~ factor(stage) + factor(histol) + I(age/12),
+ data =ccoh.data, subcoh = ~subcohort, id=~seqno,
+ cohort.size=4028, method="LinYing")
+@
+
+
+Barlow (1994) proposes an analysis that ignores the finite population
+correction at the second phase. This simplifies the standard error
+estimation, as the design can be expressed as one-phase stratified
+superpopulation sampling. The standard errors will be somewhat
+conservative. More data preparation is needed for this analysis as the
+weights change over time.
+<<>>=
+nwtco$eventrec<-rep(0,nrow(nwtco))
+nwtco.extra<-subset(nwtco, rel==1)
+nwtco.extra$eventrec<-1
+nwtco.expd<-rbind(subset(nwtco,in.subcohort==1),nwtco.extra)
+nwtco.expd$stop<-with(nwtco.expd,
+ ifelse(rel & !eventrec, edrel-0.001,edrel))
+nwtco.expd$start<-with(nwtco.expd,
+ ifelse(rel & eventrec, edrel-0.001, 0))
+nwtco.expd$event<-with(nwtco.expd,
+ ifelse(rel & eventrec, 1, 0))
+nwtco.expd$pwts<-ifelse(nwtco.expd$event, 1, 1/with(nwtco,mean(in.subcohort | rel)))
+@
+
+The analysis corresponds to a cluster-sampled design in which
+individuals are sampled stratified by subcohort membership and then
+time periods are sampled stratified by event status. Having
+individual as the primary sampling unit is necessary for correct
+standard error calculation.
+
+<<>>=
+(dBarlow<-svydesign(id=~seqno+eventrec, strata=~in.subcohort+rel,
+ data=nwtco.expd, weight=~pwts))
+svycoxph(Surv(start,stop,event)~factor(stage)+factor(histol)+I(age/12),
+ design=dBarlow)
+@
+
+In fact, as the finite population correction is not being used the second stage of the cluster sampling could be ignored. We can also produce the stratified bootstrap standard errors of Wacholder et al (1989), using a replicate weights analysis
+
+<<>>=
+(dWacholder <- as.svrepdesign(dBarlow,type="bootstrap",replicates=500))
+svycoxph(Surv(start,stop,event)~factor(stage)+factor(histol)+I(age/12),
+ design=dWacholder)
+@
+
+
+\subsection*{Exposure-stratified designs}
+
+
+Borgan et al (2000) propose designs stratified or post-stratified on
+phase-one variables. The examples at
+\url{http://faculty.washington.edu/norm/software.html} use a different
+subcohort sample for this stratified design, so we load the new
+\texttt{subcohort} variable
+<<>>=
+load(system.file("doc","nwtco-subcohort.rda",package="survey"))
+nwtco$subcohort<-subcohort
+
+d_BorganII <- twophase(id=list(~seqno,~seqno),
+ strata=list(NULL,~interaction(instit,rel)),
+ data=nwtco, subset=~I(rel |subcohort))
+(b2<-svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=d_BorganII))
+@
+
+
+We can further post-stratify the design on disease stage and age with \texttt{calibrate}
+<<>>=
+d_BorganIIps <- calibrate(d_BorganII, phase=2, formula=~age+interaction(instit,rel,stage))
+svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=d_BorganIIps)
+@
+
+
+\section*{References}
+
+Barlow WE (1994). Robust variance estimation for the case-cohort
+design. \emph{Biometrics} 50: 1064-1072
+
+Borgan \O, Langholz B, Samuelson SO, Goldstein L and Pogoda J (2000). Exposure stratified case-cohort designs, \emph{Lifetime Data Analysis} 6:39-58
+
+Breslow NW and Chatterjee N. (1999) Design and analysis of two-phase
+studies with binary outcome applied to Wilms tumour prognosis. \emph{Applied
+Statistics} 48:457-68.
+
+
+Lin DY, and Ying Z (1993). Cox regression with incomplete covariate measurements.
+\emph{Journal of the American Statistical Association} 88: 1341-1349.
+
+Therneau TM and Li H., Computing the Cox model for case-cohort
+designs. \emph{Lifetime Data Analysis} 5:99-112, 1999
+
+Wacholder S, Gail MH, Pee D, and Brookmeyer R (1989)
+Alternate variance and efficiency calculations for the case-cohort design
+\emph{Biometrika}, 76, 117-123
+\end{document}
diff --git a/inst/doc/epi.pdf b/inst/doc/epi.pdf
new file mode 100644
index 0000000..497a7f1
Binary files /dev/null and b/inst/doc/epi.pdf differ
diff --git a/inst/doc/nwtco-subcohort.rda b/inst/doc/nwtco-subcohort.rda
new file mode 100644
index 0000000..3be00e0
Binary files /dev/null and b/inst/doc/nwtco-subcohort.rda differ
diff --git a/inst/doc/nwts.rda b/inst/doc/nwts.rda
new file mode 100644
index 0000000..5c04841
Binary files /dev/null and b/inst/doc/nwts.rda differ
diff --git a/inst/doc/phase1.R b/inst/doc/phase1.R
new file mode 100644
index 0000000..2b564a6
--- /dev/null
+++ b/inst/doc/phase1.R
@@ -0,0 +1,58 @@
+### R code from vignette source 'phase1.Rnw'
+
+###################################################
+### code chunk number 1: phase1.Rnw:82-105
+###################################################
+rei<-read.table(textConnection(
+" id N n.a h n.ah n.h sub y
+1 1 300 20 1 12 5 TRUE 1
+2 2 300 20 1 12 5 TRUE 2
+3 3 300 20 1 12 5 TRUE 3
+4 4 300 20 1 12 5 TRUE 4
+5 5 300 20 1 12 5 TRUE 5
+6 6 300 20 1 12 5 FALSE NA
+7 7 300 20 1 12 5 FALSE NA
+8 8 300 20 1 12 5 FALSE NA
+9 9 300 20 1 12 5 FALSE NA
+10 10 300 20 1 12 5 FALSE NA
+11 11 300 20 1 12 5 FALSE NA
+12 12 300 20 1 12 5 FALSE NA
+13 13 300 20 2 8 3 TRUE 6
+14 14 300 20 2 8 3 TRUE 7
+15 15 300 20 2 8 3 TRUE 8
+16 16 300 20 2 8 3 FALSE NA
+17 17 300 20 2 8 3 FALSE NA
+18 18 300 20 2 8 3 FALSE NA
+19 19 300 20 2 8 3 FALSE NA
+20 20 300 20 2 8 3 FALSE NA
+"), header=TRUE)
+
+
+###################################################
+### code chunk number 2: phase1.Rnw:109-113
+###################################################
+library(survey)
+des.rei <- twophase(id=list(~id,~id), strata=list(NULL,~h),
+ fpc=list(~N,NULL), subset=~sub, data=rei)
+tot<- svytotal(~y, des.rei)
+
+
+###################################################
+### code chunk number 3: phase1.Rnw:117-124
+###################################################
+rei$w.ah <- rei$n.ah / rei$n.a
+a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE)
+a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE)
+a.rei$y.u <- sum(a.rei$w.ah * a.rei$y)
+a.rei$f<-with(a.rei, n.a/N)
+a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1))
+Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2)))
+
+
+###################################################
+### code chunk number 4: phase1.Rnw:128-130
+###################################################
+Vphase1
+attr(vcov(tot),"phases")$phase1
+
+
diff --git a/inst/doc/phase1.Rnw b/inst/doc/phase1.Rnw
new file mode 100644
index 0000000..7e20f38
--- /dev/null
+++ b/inst/doc/phase1.Rnw
@@ -0,0 +1,134 @@
+\documentclass{article}
+\usepackage{url}
+\addtolength{\topmargin}{-0.5in}
+\addtolength{\textheight}{0.75in}
+\addtolength{\oddsidemargin}{-0.5in}
+\addtolength{\textwidth}{1in}
+%\VignetteIndexEntry{Obsolete formulas for two-phase variances}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{Obsolete formulas for two-phase variances}
+
+\begin{document}
+\maketitle
+This document explains the computation of variances for totals in
+two-phase designs before version 3.15, or using \texttt{method=''approx''}. Since version 3.15 the variances are computed directly using a sparse-matrix representation of the covariance of sampling indicators, and agree exactly with the formulas in Section 9.3 of S\"arndal, Swensson, and Wretman.
+Variances for other statistics are computed by the
+delta-method from the variance of the total of the estimating
+functions.
+
+The variance formulas come from conditioning on the sample selected in
+the first phase
+$$\textrm{var}[\hat T]=E\left[\textrm{var}\left[\hat T|\textrm{phase 1}\right]\right]+\textrm{var}\left[E\left[\hat T|\textrm{phase 1}\right]\right]$$
+
+The first term is estimated by the variance of $\hat T$ considering
+the phase one sample as the fixed population, and so uses the same
+computations as any single-phase design. The second term is the
+variance of $\hat T$ if complete data were available for the phase-one
+sample. This takes a little more work.
+
+
+The variance computations for a stratified, clustered, multistage
+design involve recursively computing a within-stratum variance for the
+total over sampling units at the next stage. That is, we want to
+compute
+$$s^2=\frac{1}{n-1}\sum_{i=1}^n (X_i-\bar X)$$
+where $X_i$ are $\pi$-expanded observations, perhaps summed over sampling units.
+A natural estimator of $s^2$ when only some observations are present in the phase-two sample is
+$$\hat s^2=\frac{1}{n-1}\sum_{i=1}^n \frac{R_i}{\pi_i} (X_i-\hat{\bar X})$$
+where $\pi_i$ is the probability that $X_i$ is available and $R_i$ is the indicator that $X_i$ is available. We also need an estimator for $\bar X$, and a natural one is
+$$\hat{\bar X}=\frac{1}{n}\sum_{i=1}^n \frac{R_i}{\pi_i}X_i$$
+
+This is not an unbiased estimator of $s^2$ unless $\hat{\bar X}=\bar X$,
+but the bias is of order $O(n_2^{-1})$ where $n_2=\sum_i R_i$ is the
+number of phase-two observations.
+
+If the phase-one design involves only a single stage of sampling then
+$X_i$ is $Y_i/p_i$, where $Y_i$ is the observed value and $p_i$ is the
+phase-one sampling probability. For multistage phase-one designs (not
+yet implemented) $X_i$ will be more complicated, but still feasible to
+automate.
+
+This example shows the unbiased phase-one estimate (from Takahiro
+Tsuchiya) and the estimate I use, in a situation where the phase two
+sample is quite small.
+
+First we read the data
+\begin{verbatim}
+rei<-read.table(textConnection(
+" id N n.a h n.ah n.h sub y
+1 1 300 20 1 12 5 TRUE 1
+2 2 300 20 1 12 5 TRUE 2
+3 3 300 20 1 12 5 TRUE 3
+4 4 300 20 1 12 5 TRUE 4
+5 5 300 20 1 12 5 TRUE 5
+6 6 300 20 1 12 5 FALSE NA
+7 7 300 20 1 12 5 FALSE NA
+8 8 300 20 1 12 5 FALSE NA
+9 9 300 20 1 12 5 FALSE NA
+10 10 300 20 1 12 5 FALSE NA
+11 11 300 20 1 12 5 FALSE NA
+12 12 300 20 1 12 5 FALSE NA
+13 13 300 20 2 8 3 TRUE 6
+14 14 300 20 2 8 3 TRUE 7
+15 15 300 20 2 8 3 TRUE 8
+16 16 300 20 2 8 3 FALSE NA
+17 17 300 20 2 8 3 FALSE NA
+18 18 300 20 2 8 3 FALSE NA
+19 19 300 20 2 8 3 FALSE NA
+20 20 300 20 2 8 3 FALSE NA
+"), header=TRUE)
+\end{verbatim}
+<<echo=FALSE>>=
+rei<-read.table(textConnection(
+" id N n.a h n.ah n.h sub y
+1 1 300 20 1 12 5 TRUE 1
+2 2 300 20 1 12 5 TRUE 2
+3 3 300 20 1 12 5 TRUE 3
+4 4 300 20 1 12 5 TRUE 4
+5 5 300 20 1 12 5 TRUE 5
+6 6 300 20 1 12 5 FALSE NA
+7 7 300 20 1 12 5 FALSE NA
+8 8 300 20 1 12 5 FALSE NA
+9 9 300 20 1 12 5 FALSE NA
+10 10 300 20 1 12 5 FALSE NA
+11 11 300 20 1 12 5 FALSE NA
+12 12 300 20 1 12 5 FALSE NA
+13 13 300 20 2 8 3 TRUE 6
+14 14 300 20 2 8 3 TRUE 7
+15 15 300 20 2 8 3 TRUE 8
+16 16 300 20 2 8 3 FALSE NA
+17 17 300 20 2 8 3 FALSE NA
+18 18 300 20 2 8 3 FALSE NA
+19 19 300 20 2 8 3 FALSE NA
+20 20 300 20 2 8 3 FALSE NA
+"), header=TRUE)
+@
+
+Now, construct a two-phase design object and compute the total of \verb=y=
+<<>>=
+library(survey)
+des.rei <- twophase(id=list(~id,~id), strata=list(NULL,~h),
+ fpc=list(~N,NULL), subset=~sub, data=rei)
+tot<- svytotal(~y, des.rei)
+@
+
+The unbiased estimator is given by equation 9.4.14 of S\"arndal, Swensson, \& Wretman.
+<<>>=
+rei$w.ah <- rei$n.ah / rei$n.a
+a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE)
+a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE)
+a.rei$y.u <- sum(a.rei$w.ah * a.rei$y)
+a.rei$f<-with(a.rei, n.a/N)
+a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1))
+Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2)))
+@
+
+The phase-two contributions (not shown) are identical. The phase-one contributions are quite close
+<<>>=
+Vphase1
+attr(vcov(tot),"phases")$phase1
+@
+
+
+\end{document}
diff --git a/inst/doc/phase1.pdf b/inst/doc/phase1.pdf
new file mode 100644
index 0000000..5ae1ff3
Binary files /dev/null and b/inst/doc/phase1.pdf differ
diff --git a/inst/doc/pps.R b/inst/doc/pps.R
new file mode 100644
index 0000000..dc303a9
--- /dev/null
+++ b/inst/doc/pps.R
@@ -0,0 +1,53 @@
+### R code from vignette source 'pps.Rnw'
+
+###################################################
+### code chunk number 1: pps.Rnw:57-61
+###################################################
+library(survey)
+data(election)
+summary(election$p)
+summary(election_pps$p)
+
+
+###################################################
+### code chunk number 2: pps.Rnw:65-77
+###################################################
+## Hajek type
+dpps_br<- svydesign(id=~1, fpc=~p, data=election_pps, pps="brewer")
+## Horvitz-Thompson type
+dpps_ov<- svydesign(id=~1, fpc=~p, data=election_pps, pps="overton")
+dpps_hr<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40))
+dpps_hr1<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR())
+dpps_ht<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob))
+## Yates-Grundy type
+dpps_yg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob),variance="YG")
+dpps_hryg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40),variance="YG")
+## The with-replacement approximation
+dppswr <-svydesign(id=~1, probs=~p, data=election_pps)
+
+
+###################################################
+### code chunk number 3: pps.Rnw:81-82
+###################################################
+show(image(dpps_ht))
+
+
+###################################################
+### code chunk number 4: pps.Rnw:84-85
+###################################################
+show(image(dpps_ov))
+
+
+###################################################
+### code chunk number 5: pps.Rnw:91-99
+###################################################
+svytotal(~Bush+Kerry+Nader, dpps_ht)
+svytotal(~Bush+Kerry+Nader, dpps_yg)
+svytotal(~Bush+Kerry+Nader, dpps_hr)
+svytotal(~Bush+Kerry+Nader, dpps_hryg)
+svytotal(~Bush+Kerry+Nader, dpps_hr1)
+svytotal(~Bush+Kerry+Nader, dpps_br)
+svytotal(~Bush+Kerry+Nader, dpps_ov)
+svytotal(~Bush+Kerry+Nader, dppswr)
+
+
diff --git a/inst/doc/pps.Rnw b/inst/doc/pps.Rnw
new file mode 100644
index 0000000..7f9867c
--- /dev/null
+++ b/inst/doc/pps.Rnw
@@ -0,0 +1,103 @@
+\documentclass{article}
+\usepackage{url}
+\addtolength{\topmargin}{-0.5in}
+\addtolength{\textheight}{0.75in}
+\addtolength{\oddsidemargin}{-0.5in}
+\addtolength{\textwidth}{1in}
+%\VignetteIndexEntry{Analysing PPS designs}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{Describing PPS designs to R}
+
+\begin{document}
+\maketitle
+
+The survey package has always supported PPS (ie, arbitrary unequal probability) sampling with replacement, or using the with-replacement single-stage approximation to a multistage design. No special notation is required: just specify the correct sampling weights.
+
+Version 3.11 added an another approximation for PPS sampling without replacement, and version 3.16 added more support. There are two broad classes of estimators for PPS sampling without replacement: approximations to the Horvitz--Thompson and Yates--Grundy estimators based on approximating the pairwise sampling probabilities, and estimators of H\'ajek type that attempt to recover the extra precision of a without-replacement design by conditioning on the estimated population size.
+
+\subsection*{Direct approximations}
+Using the standard recursive algorithm for stratified multistage
+sampling when one or more stages are actually PPS gives an
+approximation due to Brewer. This is simple to compute, always
+non-negative, and appears to be fairly efficient.
+
+
+
+\subsection*{Approximating $\pi_{ij}$}
+Given the pairwise sampling probabilities $\pi_{ij}$ we can define the weighted covariance of sampling indicators
+$$\check{\Delta}_{ij} = 1-\frac{\pi_i\pi_j}{\pi_{ij}}$$
+ and the weighted observations
+ $$\check{x}_i=\frac{1}{\pi_i}x_i.$$
+
+Two unbiased estimators of the variance of the total of $x$ are the Horvitz--Thompson estimator
+$$\hat V_{HT}= \sum_{i,j=1}^n \check{\Delta}\check{x}_i\check{x}_j$$
+and the Yates--Grundy(--Sen) estimator
+$$\hat V_{YG}= \frac{1}{2}\sum_{i,j=1}^n \check{\Delta}(\check{x}_i-\check{x}_j)^2$$
+The Yates--Grundy estimator appears to be preferred in most comparisons. It is always non-negative (up to rounding error, at least).
+
+In principle, $\pi_{ij}$ might not be available and various approximations have been proposed. The (truncated) Hartley--Rao approximation is
+$$\check{\Delta}_{ij}=1-\frac{n-\pi_i-\pi_j+\sum_{k=1}^N\pi^2_k/n}{n-1}$$
+which requires knowing $\pi_i$ for all units in the population. The population sum can be estimated from the sample, giving a further approximation
+$$\check{\Delta}_{ij}=1-\frac{n-\pi_i-\pi_j+\sum_{k=1}^n\pi_k/n}{n-1}.$$
+that requires only the sample $\pi_i$. Overton's approximation is
+$$\check{\Delta}_{ij}=1-\frac{n-(\pi_i+\pi_j)/2}{n-1}$$
+which also requires only the sample $\pi_i$.
+
+In practice, given modern computing power, $\pi_{ij}$ should be available either explicitly or by simulation, so the Hartley--Rao and Overton approximations are not particularly useful.
+
+\subsection{Using the PPS estimators}
+At the moment, only Brewer's approximation can be used as a component of multistage sampling, though for any sampling design it is possible to work out the joint sampling probabilities and use the other approaches. The other approaches can be used for cluster sampling or for sampling of individual units. This is likely to change in the future.
+
+To specify a PPS design, the sampling probabilities must be given in the \texttt{prob} argument of \texttt{svydesign}, or in the \texttt{fpc} argument, with \texttt{prob} and \texttt{weight} unspecified. In addition, it is necessary to specify which PPS computation should be used, with the \texttt{pps} argument. The optional \texttt{variance} argument specifies the Horvitz--Thompson (\texttt{variance="HT"}) or Yates--Grundy (\texttt{variance="YG"}) estimator, with the default being \tex [...]
+
+Some estimators require information in addition to the sampling probabilities for units in the sample. This information is supplied to the \texttt{pps=} argument of \texttt{svydesign} using wrapper functions that create objects with appropriate classes. To specify the population sum $\sum pi_i^2/n$ needed for the Hartley--Rao approximation, use \texttt{HR()}, and to specify a matrix of pairwise sampling probabilities use \texttt{ppsmat()}. The function \texttt{HR()} without an argume [...]
+
+The data set \texttt{election} contains county-level voting data from the 2004 US presidential elections, with a PPS sample of size 40 taken using Till\'e's splitting method, from the \texttt{sampling} package. The sampling probabilities vary widely, with Los Angeles County having a probability of 0.9 and many small counties having probabilities less than 0.0005.
+<<>>=
+library(survey)
+data(election)
+summary(election$p)
+summary(election_pps$p)
+@
+
+Some possible survey design specifications for these data are:
+<<>>=
+## Hajek type
+dpps_br<- svydesign(id=~1, fpc=~p, data=election_pps, pps="brewer")
+## Horvitz-Thompson type
+dpps_ov<- svydesign(id=~1, fpc=~p, data=election_pps, pps="overton")
+dpps_hr<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40))
+dpps_hr1<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR())
+dpps_ht<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob))
+## Yates-Grundy type
+dpps_yg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob),variance="YG")
+dpps_hryg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40),variance="YG")
+## The with-replacement approximation
+dppswr <-svydesign(id=~1, probs=~p, data=election_pps)
+@
+
+All the without-replacement design objects except for Brewer's method include a matrix $\check{\Delta}$. These can be visualized with the \texttt{image()} method. These plots use the \texttt{lattice} package and so need \texttt{show()} to display them inside a program:
+<<fig=TRUE>>=
+show(image(dpps_ht))
+@
+<<fig=TRUE>>=
+show(image(dpps_ov))
+@
+In this example there are more negative entries in $\check{\Delta}$ with the approximate methods than when the full pairwise sampling matrix is supplied.
+
+The estimated totals are the same with all the methods, but the standard errors are not.
+
+<<>>=
+svytotal(~Bush+Kerry+Nader, dpps_ht)
+svytotal(~Bush+Kerry+Nader, dpps_yg)
+svytotal(~Bush+Kerry+Nader, dpps_hr)
+svytotal(~Bush+Kerry+Nader, dpps_hryg)
+svytotal(~Bush+Kerry+Nader, dpps_hr1)
+svytotal(~Bush+Kerry+Nader, dpps_br)
+svytotal(~Bush+Kerry+Nader, dpps_ov)
+svytotal(~Bush+Kerry+Nader, dppswr)
+@
+
+
+\end{document}
diff --git a/inst/doc/pps.pdf b/inst/doc/pps.pdf
new file mode 100644
index 0000000..e1a2c07
Binary files /dev/null and b/inst/doc/pps.pdf differ
diff --git a/inst/doc/survey.R b/inst/doc/survey.R
new file mode 100644
index 0000000..5cd73b8
--- /dev/null
+++ b/inst/doc/survey.R
@@ -0,0 +1,70 @@
+### R code from vignette source 'survey.Rnw'
+
+###################################################
+### code chunk number 1: survey.Rnw:26-29
+###################################################
+library(survey)
+data(api)
+dclus1 <- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
+
+
+###################################################
+### code chunk number 2: survey.Rnw:33-34
+###################################################
+summary(dclus1)
+
+
+###################################################
+### code chunk number 3: survey.Rnw:43-48
+###################################################
+svymean(~api00, dclus1)
+svyquantile(~api00, dclus1, quantile=c(0.25,0.5,0.75), ci=TRUE)
+svytotal(~stype, dclus1)
+svytotal(~enroll, dclus1)
+svyratio(~api.stu,~enroll, dclus1)
+
+
+###################################################
+### code chunk number 4: survey.Rnw:55-56
+###################################################
+svyratio(~api.stu, ~enroll, design=subset(dclus1, stype=="H"))
+
+
+###################################################
+### code chunk number 5: survey.Rnw:64-66
+###################################################
+vars<-names(apiclus1)[c(12:13,16:23,27:37)]
+svymean(make.formula(vars),dclus1,na.rm=TRUE)
+
+
+###################################################
+### code chunk number 6: survey.Rnw:73-74
+###################################################
+svyby(~ell+meals, ~stype, design=dclus1, svymean)
+
+
+###################################################
+### code chunk number 7: survey.Rnw:79-83
+###################################################
+regmodel <- svyglm(api00~ell+meals,design=dclus1)
+logitmodel <- svyglm(I(sch.wide=="Yes")~ell+meals, design=dclus1, family=quasibinomial())
+summary(regmodel)
+summary(logitmodel)
+
+
+###################################################
+### code chunk number 8: survey.Rnw:87-88
+###################################################
+gclus1 <- calibrate(dclus1, formula=~api99, population=c(6194, 3914069))
+
+
+###################################################
+### code chunk number 9: survey.Rnw:91-96
+###################################################
+svymean(~api00, gclus1)
+svyquantile(~api00, gclus1, quantile=c(0.25,0.5,0.75), ci=TRUE)
+svytotal(~stype, gclus1)
+svytotal(~enroll, gclus1)
+svyratio(~api.stu,~enroll, gclus1)
+
+
diff --git a/inst/doc/survey.Rnw b/inst/doc/survey.Rnw
new file mode 100644
index 0000000..14e3a5a
--- /dev/null
+++ b/inst/doc/survey.Rnw
@@ -0,0 +1,100 @@
+\documentclass{article}
+\usepackage{url}
+%\VignetteIndexEntry{A survey analysis example}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{A survey analysis example}
+
+\begin{document}
+\maketitle
+
+This document provides a simple example analysis of a survey data set,
+a subsample from the California Academic Performance Index, an annual
+set of tests used to evaluate California schools. The API website,
+including the original data files are at
+\url{http://api.cde.ca.gov}. The subsample was generated as a teaching
+example by Academic Technology Services at UCLA and was obtained from
+\url{http://www.ats.ucla.edu/stat/stata/Library/svy_survey.htm}.
+
+
+We have a cluster sample in which 15 school districts were sampled and
+then all schools in each district. This is in the data frame
+\texttt{apiclus1}, loaded with \texttt{data(api)}. The two-stage sample is
+defined by the sampling unit (\texttt{dnum}) and the population
+size(\texttt{fpc}). Sampling weights are computed from the population
+sizes, but could be provided separately.
+<<>>=
+library(survey)
+data(api)
+dclus1 <- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
+@
+
+The \texttt{svydesign} function returns an object containing the survey data and metadata.
+<<>>=
+summary(dclus1)
+@
+
+We can compute summary statistics to estimate the mean, median, and
+quartiles of the Academic Performance Index in the year 2000, the
+number of elementary, middle, and high schools in the state, the total
+number of students, and the proportion who took the test. Each
+function takes a formula object describing the variables and a survey
+design object containing the data.
+<<>>=
+svymean(~api00, dclus1)
+svyquantile(~api00, dclus1, quantile=c(0.25,0.5,0.75), ci=TRUE)
+svytotal(~stype, dclus1)
+svytotal(~enroll, dclus1)
+svyratio(~api.stu,~enroll, dclus1)
+@
+
+The ordinary R subsetting functions \verb'[' and \texttt{subset} work
+correctly on these survey objects, carrying along the metadata needed
+for valid standard errors. Here we compute the proportion of high
+school students who took the test
+<<>>=
+svyratio(~api.stu, ~enroll, design=subset(dclus1, stype=="H"))
+@
+
+The warnings referred to in the output occured because several
+school districts have only one high school sampled, making the second
+stage standard error estimation unreliable.
+
+Specifying a large number of variables is made easier by the \texttt{make.formula} function
+<<>>=
+vars<-names(apiclus1)[c(12:13,16:23,27:37)]
+svymean(make.formula(vars),dclus1,na.rm=TRUE)
+@
+
+Summary statistics for subsets can also be computed with
+\texttt{svyby}. Here we compute the average proportion of ``English
+language learners'' and of students eligible for subsidized school
+meals for elementary, middle, and high schools
+<<>>=
+svyby(~ell+meals, ~stype, design=dclus1, svymean)
+@
+
+
+Regression models show that these socieconomic variables predict API score and whether the school achieved its API target
+<<>>=
+regmodel <- svyglm(api00~ell+meals,design=dclus1)
+logitmodel <- svyglm(I(sch.wide=="Yes")~ell+meals, design=dclus1, family=quasibinomial())
+summary(regmodel)
+summary(logitmodel)
+@
+
+We can calibrate the sampling using the statewide total for the previous year's API
+<<>>=
+gclus1 <- calibrate(dclus1, formula=~api99, population=c(6194, 3914069))
+@
+which improves estimation of some quantities
+<<>>=
+svymean(~api00, gclus1)
+svyquantile(~api00, gclus1, quantile=c(0.25,0.5,0.75), ci=TRUE)
+svytotal(~stype, gclus1)
+svytotal(~enroll, gclus1)
+svyratio(~api.stu,~enroll, gclus1)
+@
+
+
+\end{document}
diff --git a/inst/doc/survey.pdf b/inst/doc/survey.pdf
new file mode 100644
index 0000000..ff3f762
Binary files /dev/null and b/inst/doc/survey.pdf differ
diff --git a/inst/porting.to.S b/inst/porting.to.S
new file mode 100755
index 0000000..bfd1297
--- /dev/null
+++ b/inst/porting.to.S
@@ -0,0 +1,4 @@
+Version 3.6-12 is available for S-PLUS 8.0, ported by Patrick Aboyoun,
+who was then at what was then Insightful. Comparing this to the R version
+3.6-12 should help if you want to port more recent versions.
+
diff --git a/inst/twostage.pdf b/inst/twostage.pdf
new file mode 100644
index 0000000..f5c45f6
Binary files /dev/null and b/inst/twostage.pdf differ
diff --git a/inst/ucla-examples.pdf b/inst/ucla-examples.pdf
new file mode 100644
index 0000000..051db9b
Binary files /dev/null and b/inst/ucla-examples.pdf differ
diff --git a/man/HR.Rd b/man/HR.Rd
new file mode 100644
index 0000000..efe373c
--- /dev/null
+++ b/man/HR.Rd
@@ -0,0 +1,37 @@
+\name{HR}
+\Rdversion{1.1}
+\alias{HR}
+\alias{ppsmat}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Wrappers for specifying PPS designs}
+\description{
+The Horvitz-Thompson estimator and the Hartley-Rao approximation require information in addition to the sampling probabilities for sampled individuals. These functions allow this information to be supplied.
+}
+\usage{
+HR(psum=NULL, strata = NULL)
+ppsmat(jointprob, tolerance = 1e-04)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{psum}{ The sum of squared sampling probabilities for the population, divided by the sample size, as a single number or as a vector for stratified sampling
+}
+ \item{strata}{
+Stratum labels, of the same length as \code{psum}, if \code{psum} is a vector
+}
+\item{jointprob}{Matrix of pairwise sampling probabilities for the sampled individuals}
+\item{tolerance}{Tolerance for deciding that the covariance of sampling indicators is zero}
+}
+\value{
+An object of class \code{HR} or \code{ppsmat}, suitable for supplying as the \code{pps} argument to \code{\link{svydesign}}.
+}
+
+\seealso{
+\link{election} for examples of PPS designs
+}
+\examples{
+HR(0.1)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/SE.Rd b/man/SE.Rd
new file mode 100755
index 0000000..a6053e7
--- /dev/null
+++ b/man/SE.Rd
@@ -0,0 +1,27 @@
+\name{SE}
+\alias{SE}
+\alias{SE.default}
+\alias{SE.svrepstat}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Extract standard errors }
+\description{
+Extracts standard errors from an object. The default method is for
+objects with a \code{\link{vcov}} method.
+}
+\usage{
+SE(object, ...)
+\method{SE}{default}(object,...)
+\method{SE}{svrepstat}(object,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{object}{An object}
+ \item{\dots}{Arguments for future expansion }
+}
+\value{
+ Vector of standard errors.
+}
+\seealso{ \code{\link{vcov}}}
+
+\keyword{models}% at least one, from doc/KEYWORDS
+
diff --git a/man/anova.svyglm.Rd b/man/anova.svyglm.Rd
new file mode 100644
index 0000000..bd9a305
--- /dev/null
+++ b/man/anova.svyglm.Rd
@@ -0,0 +1,99 @@
+\name{anova.svyglm}
+\alias{anova.svyglm}
+\alias{AIC.svyglm}
+\alias{BIC.svyglm}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Model comparison for glms.
+}
+\description{
+A method for the \code{\link{anova}} function, for use on \code{\link{svyglm}} objects. With a single model argument it produces a sequential anova table, with two arguments it compares the two models.
+}
+\usage{
+\method{anova}{svyglm}(object, object2 = NULL, test = c("F", "Chisq"),
+ method = c("LRT", "Wald"), tolerance = 1e-05, ..., force = FALSE)
+\method{AIC}{svyglm}(object,...,k=2)
+\method{BIC}{svyglm}(object,...,maximal)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{object}{
+ A \code{\link{svyglm}} object.
+}
+ \item{object2}{
+ Optionally, another \code{\link{svyglm}} object.
+}
+ \item{test}{
+ Use (linear combination of) F or chi-squared distributions for p-values. F is usually preferable.
+}
+ \item{method}{
+ Use weighted deviance difference (LRT) or Wald tests to compare models
+}
+ \item{tolerance}{
+ For models that are not symbolically nested, the tolerance for deciding that a term is common to the models.
+}
+ \item{\dots}{
+ For \code{AIC} and \code{BIC}, optionally more \code{svyglm} objects
+}
+ \item{force}{
+ Force the tests to be done by explicit projection even if the models are symbolically nested (for debugging)
+}
+\item{maximal}{A \code{svyglm} model that \code{object} (and \dots if supplied) are nested in.}
+\item{k}{Multiplier for effective df in AIC. Usually 2. There is no choice of \code{k} that will give BIC}
+}
+\details{
+The reference distribution for the LRT depends on the misspecification effects for the parameters being tested (Rao and Scott, 1984). If the models are symbolically nested, so that the relevant parameters can be identified just by manipulating the model formulas, \code{anova} is equivalent to \code{\link{regTermTest}}. If the models are nested but not symbolically nested, more computation using the design matrices is needed to determine the projection matrix on to the parameters being te [...]
+
+The saddlepoint approximation is used for the LRT with numerator df greater than 1.
+
+\code{AIC} is defined using the Rao-Scott approximation to the weighted loglikelihood. It replaces the usual penalty term p, which is the null expectation of the log likelihood ratio, by the trace of the generalised design effect matrix, which is the expectation under complex sampling. For computational reasons everything is scaled so the weights sum to the sample size.
+
+\code{BIC} is a BIC for the (approximate) multivariate Gaussian models on regression coefficients from the maximal model implied by each submodel (ie, the models that say some coefficients in the maximal model are zero). It corresponds to comparing the models with a Wald test and replacing the sample size in the penalty by an effective sample size.
+For computational reasons, the models must not only be nested, the names of the coefficients must match.
+
+}
+\value{
+Object of class \code{seqanova.svyglm} if one model is given, otherwise of class \code{regTermTest} or \code{regTermTestLRT}
+}
+\note{
+At the moment, \code{AIC} works only for models including an intercept.
+ }
+\references{
+
+Rao, JNK, Scott, AJ (1984) "On Chi-squared Tests For Multiway Contingency Tables with Proportions Estimated From Survey Data" Annals of Statistics 12:46-60.
+
+Lumley, T., & Scott, A. (2014). Tests for Regression Models Fitted to Survey Data. Australian and New Zealand Journal of Statistics, 56 (1), 1-14.
+
+Lumley T, Scott AJ (forthcoming) "AIC and BIC for modelling with complex survey data"
+
+}
+
+\seealso{
+\code{\link{regTermTest}}, \code{\link{pchisqsum}}
+}
+\examples{
+data(api)
+dclus2<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)
+
+model0<-svyglm(I(sch.wide=="Yes")~ell+meals+mobility, design=dclus2, family=quasibinomial())
+model1<-svyglm(I(sch.wide=="Yes")~ell+meals+mobility+as.numeric(stype),
+ design=dclus2, family=quasibinomial())
+model2<-svyglm(I(sch.wide=="Yes")~ell+meals+mobility+stype, design=dclus2, family=quasibinomial())
+
+anova(model2)
+anova(model0,model2)
+anova(model1, model2)
+
+anova(model1, model2, method="Wald")
+
+AIC(model0,model1, model2)
+BIC(model0, model2,maximal=model2)
+
+
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{regression}% __ONLY ONE__ keyword per line
diff --git a/man/api.Rd b/man/api.Rd
new file mode 100755
index 0000000..3a55e42
--- /dev/null
+++ b/man/api.Rd
@@ -0,0 +1,132 @@
+\name{api}
+\alias{api}
+\alias{apipop}
+\alias{apiclus1}
+\alias{apiclus2}
+\alias{apistrat}
+\alias{apisrs}
+\docType{data}
+\title{Student performance in California schools}
+\description{
+The Academic Performance Index is computed for all California schools
+based on standardised testing of students. The data sets contain
+information for all schools with at least 100 students and for various
+probability samples of the data.
+}
+\usage{
+data(api)
+}
+\format{
+ The full population data in \code{apipop} are a data frame with 6194 observations on the following 37 variables.
+ \describe{
+ \item{cds}{Unique identifier}
+ \item{stype}{Elementary/Middle/High School}
+ \item{name}{School name (15 characters)}
+ \item{sname}{School name (40 characters)}
+ \item{snum}{School number}
+ \item{dname}{District name}
+ \item{dnum}{District number}
+ \item{cname}{County name}
+ \item{cnum}{County number}
+ \item{flag}{reason for missing data}
+ \item{pcttest}{percentage of students tested}
+ \item{api00}{API in 2000}
+ \item{api99}{API in 1999}
+ \item{target}{target for change in API}
+ \item{growth}{Change in API}
+ \item{sch.wide}{Met school-wide growth target?}
+ \item{comp.imp}{Met Comparable Improvement target}
+ \item{both}{Met both targets}
+ \item{awards}{Eligible for awards program}
+ \item{meals}{Percentage of students eligible for subsidized meals}
+ \item{ell}{`English Language Learners' (percent)}
+ \item{yr.rnd}{Year-round school}
+ \item{mobility}{percentage of students for whom this is the first
+ year at the school}
+ \item{acs.k3}{average class size years K-3}
+ \item{acs.46}{average class size years 4-6}
+ \item{acs.core}{Number of core academic courses}
+ \item{pct.resp}{percent where parental education level is known}
+ \item{not.hsg}{percent parents not high-school graduates}
+ \item{hsg}{percent parents who are high-school graduates}
+ \item{some.col}{percent parents with some college}
+ \item{col.grad}{percent parents with college degree}
+ \item{grad.sch}{percent parents with postgraduate education}
+ \item{avg.ed}{average parental education level}
+ \item{full}{percent fully qualified teachers}
+ \item{emer}{percent teachers with emergency qualifications}
+ \item{enroll}{number of students enrolled}
+ \item{api.stu}{number of students tested.}
+ }
+ The other data sets contain additional variables \code{pw} for
+ sampling weights and \code{fpc} to compute finite population
+ corrections to variance.
+}
+\details{
+ \code{apipop} is the entire population, \code{apisrs} is a simple random sample,
+ \code{apiclus1} is a cluster sample of school districts, \code{apistrat} is
+ a sample stratified by \code{stype}, and \code{apiclus2} is a two-stage
+ cluster sample of schools within districts. The sampling weights in
+ \code{apiclus1} are incorrect (the weight should be 757/15) but are as
+ obtained from UCLA.
+}
+\source{
+ Data were obtained from the survey sampling help pages of UCLA
+ Academic Technology Services, at \url{http://www.ats.ucla.edu/stat/stata/Library/svy_survey.htm}.
+}
+\references{
+ The API program and original data files are at \url{http://api.cde.ca.gov/}
+}
+\examples{
+library(survey)
+data(api)
+mean(apipop$api00)
+sum(apipop$enroll, na.rm=TRUE)
+
+#stratified sample
+dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+summary(dstrat)
+svymean(~api00, dstrat)
+svytotal(~enroll, dstrat, na.rm=TRUE)
+
+# one-stage cluster sample
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+summary(dclus1)
+svymean(~api00, dclus1)
+svytotal(~enroll, dclus1, na.rm=TRUE)
+
+# two-stage cluster sample
+dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
+summary(dclus2)
+svymean(~api00, dclus2)
+svytotal(~enroll, dclus2, na.rm=TRUE)
+
+# two-stage `with replacement'
+dclus2wr<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)
+summary(dclus2wr)
+svymean(~api00, dclus2wr)
+svytotal(~enroll, dclus2wr, na.rm=TRUE)
+
+
+# convert to replicate weights
+rclus1<-as.svrepdesign(dclus1)
+summary(rclus1)
+svymean(~api00, rclus1)
+svytotal(~enroll, rclus1, na.rm=TRUE)
+
+# post-stratify on school type
+pop.types<-xtabs(~stype, data=apipop)
+
+rclus1p<-postStratify(rclus1, ~stype, pop.types)
+dclus1p<-postStratify(dclus1, ~stype, pop.types)
+summary(dclus1p)
+summary(rclus1p)
+
+svymean(~api00, dclus1p)
+svytotal(~enroll, dclus1p, na.rm=TRUE)
+
+svymean(~api00, rclus1p)
+svytotal(~enroll, rclus1p, na.rm=TRUE)
+
+}
+\keyword{datasets}
diff --git a/man/as.fpc.Rd b/man/as.fpc.Rd
new file mode 100644
index 0000000..7869d1e
--- /dev/null
+++ b/man/as.fpc.Rd
@@ -0,0 +1,35 @@
+\name{as.fpc}
+\alias{as.fpc}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Package sample and population size data}
+\description{
+This function creates an object to store the number of clusters sampled
+within each stratum (at each stage of multistage sampling) and the
+number of clusters available in the population. It is called by
+\code{svydesign}, not directly by the user.
+}
+\usage{
+as.fpc(df, strata, ids,pps=FALSE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{df}{A data frame or matrix with population size information}
+ \item{strata}{A data frame giving strata at each stage}
+ \item{ids}{A data frame giving cluster ids at each stage}
+ \item{pps}{if \code{TRUE}, fpc information may vary within a stratum
+ and must be specified as a proportion rather than a population sizes}
+}
+\details{
+ The population size information may be specified as the number of
+ clusters in the population or as the proportion of clusters sampled.
+
+}
+\value{
+ An object of class \code{survey_fpc}
+}
+
+
+\seealso{\code{\link{svydesign}},\code{\link{svyrecvar}}}
+
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/as.svrepdesign.Rd b/man/as.svrepdesign.Rd
new file mode 100755
index 0000000..6736489
--- /dev/null
+++ b/man/as.svrepdesign.Rd
@@ -0,0 +1,100 @@
+\name{as.svrepdesign}
+\alias{as.svrepdesign}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Convert a survey design to use replicate weights}
+\description{
+ Creates a replicate-weights survey design object from a traditional
+ strata/cluster survey design object. \code{JK1} and \code{JKn} are
+ jackknife methods, \code{BRR} is Balanced Repeated Replicates and
+ \code{Fay} is Fay's modification of this, \code{bootstrap} is Canty
+ and Davison's bootstrap, \code{subbootstrap} is Rao and Wu's
+ \eqn{(n-1)} bootstrap, and \code{mrbbootstrap} is Preston's multistage rescaled bootstrap.
+}
+\usage{
+as.svrepdesign(design, type=c("auto", "JK1", "JKn", "BRR", "bootstrap",
+ "subbootstrap","mrbbootstrap","Fay"),
+ fay.rho = 0, fpc=NULL,fpctype=NULL,..., compress=TRUE,
+ mse=getOption("survey.replicates.mse"))
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{design}{Object of class \code{survey.design} }
+ \item{type}{Type of replicate weights. \code{"auto"} uses JKn for
+ stratified, JK1 for unstratified designs}
+ \item{fay.rho}{Tuning parameter for Fay's variance method }
+ \item{fpc,fpctype,\dots}{Passed to \code{jk1weights}, \code{jknweights},
+ \code{brrweights}, \code{bootweights}, \code{subbootweights}, or \code{mrbweights}.}
+ \item{compress}{Use a compressed representation of the replicate
+ weights matrix.}
+ \item{mse}{if \code{TRUE}, compute variances from sums of squares around
+ the point estimate, rather than the mean of the replicates}
+}
+
+\value{
+ Object of class \code{svyrep.design}.
+}
+\references{
+ Canty AJ, Davison AC. (1999) Resampling-based variance
+ estimation for labour force surveys. The Statistician 48:379-391
+
+ Judkins, D. (1990), "Fay's Method for Variance Estimation," Journal of Official Statistics, 6, 223-239.
+
+ Preston J. (2009) Rescaled bootstrap for stratified multistage sampling. Survey Methodology 35(2) 227-234
+
+ Rao JNK, Wu CFJ. Bootstrap inference for sample surveys. Proc Section
+ on Survey Research Methodology. 1993 (866--871)
+}
+
+\seealso{\code{\link{brrweights}}, \code{\link{svydesign}},
+ \code{\link{svrepdesign}}, \code{\link{bootweights}}, \code{\link{subbootweights}}, \code{\link{mrbweights}}
+}
+
+\examples{
+data(scd)
+scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
+nest=TRUE, fpc=rep(5,6))
+scdnofpc<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
+nest=TRUE)
+
+# convert to BRR replicate weights
+scd2brr <- as.svrepdesign(scdnofpc, type="BRR")
+scd2fay <- as.svrepdesign(scdnofpc, type="Fay",fay.rho=0.3)
+# convert to JKn weights
+scd2jkn <- as.svrepdesign(scdnofpc, type="JKn")
+
+# convert to JKn weights with finite population correction
+scd2jknf <- as.svrepdesign(scddes, type="JKn")
+
+## with user-supplied hadamard matrix
+scd2brr1 <- as.svrepdesign(scdnofpc, type="BRR", hadamard.matrix=paley(11))
+
+svyratio(~alive, ~arrests, design=scd2brr)
+svyratio(~alive, ~arrests, design=scd2brr1)
+svyratio(~alive, ~arrests, design=scd2fay)
+svyratio(~alive, ~arrests, design=scd2jkn)
+svyratio(~alive, ~arrests, design=scd2jknf)
+
+data(api)
+## one-stage cluster sample
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+## convert to JK1 jackknife
+rclus1<-as.svrepdesign(dclus1)
+## convert to bootstrap
+bclus1<-as.svrepdesign(dclus1,type="bootstrap", replicates=100)
+
+svymean(~api00, dclus1)
+svytotal(~enroll, dclus1)
+
+svymean(~api00, rclus1)
+svytotal(~enroll, rclus1)
+
+svymean(~api00, bclus1)
+svytotal(~enroll, bclus1)
+
+dclus2<-svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
+mrbclus2<-as.svrepdesign(dclus2, type="mrb",replicates=100)
+svytotal(~api00+stype, dclus2)
+svytotal(~api00+stype, mrbclus2)
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/as.svydesign2.Rd b/man/as.svydesign2.Rd
new file mode 100755
index 0000000..f925f61
--- /dev/null
+++ b/man/as.svydesign2.Rd
@@ -0,0 +1,33 @@
+\name{as.svydesign2}
+\alias{as.svydesign2}
+\alias{.svycheck}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Update to the new survey design format}
+\description{
+The structure of survey design objects changed in version 2.9, to allow
+standard errors based on multistage sampling. \code{as.svydesign} converts an
+object to the new structure and \code{.svycheck} warns if an object
+does not have the new structure.
+
+You can set \code{options(survey.want.obsolete=TRUE)} to suppress the
+warnings produced by \code{.svycheck} and
+\code{options(survey.ultimate.cluster=TRUE)} to always compute
+variances based on just the first stage of sampling.
+}
+\usage{
+as.svydesign2(object)
+.svycheck(object)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{object}{produced by \code{svydesign}}
+}
+
+\value{
+Object of class \code{survey.design2}
+}
+
+\seealso{\code{\link{svydesign}}, \code{\link{svyrecvar}}}
+
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/barplot.svystat.Rd b/man/barplot.svystat.Rd
new file mode 100644
index 0000000..d2a0083
--- /dev/null
+++ b/man/barplot.svystat.Rd
@@ -0,0 +1,51 @@
+\name{barplot.svystat}
+\alias{barplot.svystat}
+\alias{barplot.svrepstat}
+\alias{barplot.svyby}
+\alias{dotchart}
+\alias{dotchart.svystat}
+\alias{dotchart.svrepstat}
+\alias{dotchart.svyby}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Barplots and Dotplots }
+\description{
+Draws a barplot or dotplot based on results from a survey analysis. The default
+barplot method already works for results from \code{\link{svytable}}.
+}
+\usage{
+\method{barplot}{svystat}(height, ...)
+\method{barplot}{svrepstat}(height, ...)
+\method{barplot}{svyby}(height,beside=TRUE, ...)
+
+\method{dotchart}{svystat}(x,...,pch=19)
+\method{dotchart}{svrepstat}(x,...,pch=19)
+\method{dotchart}{svyby}(x,...,pch=19)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{height,x}{Analysis result }
+ \item{beside}{Grouped, rather than stacked, bars}
+ \item{\dots}{ Arguments to \code{\link{barplot}} or \code{dotchart} }
+ \item{pch}{Overrides the default in \code{dotchart.default}}
+}
+
+
+\examples{
+
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+a<-svymean(~stype, dclus1)
+barplot(a)
+barplot(a, names.arg=c("Elementary","High","Middle"), col="purple",
+ main="Proportions of school level")
+
+b<-svyby(~enroll+api.stu, ~stype, dclus1, svymean)
+barplot(b,beside=TRUE,legend=TRUE)
+dotchart(b)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{hplot}% __ONLY ONE__ keyword per line
diff --git a/man/bootweights.Rd b/man/bootweights.Rd
new file mode 100644
index 0000000..5dd3c13
--- /dev/null
+++ b/man/bootweights.Rd
@@ -0,0 +1,76 @@
+\name{bootweights}
+\alias{bootweights}
+\alias{subbootweights}
+\alias{mrbweights}
+\alias{bootstratum}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Compute survey bootstrap weights }
+\description{
+Bootstrap weights for infinite populations ('with replacement' sampling) are created by sampling with
+replacement from the PSUs in each stratum. \code{subbootweights()}
+samples \code{n-1} PSUs from the \code{n} available (Rao and Wu),
+\code{bootweights} samples \code{n} (Canty and Davison).
+
+For multistage designs or those with large sampling fractions,
+\code{mrbweights} implements Preston's multistage rescaled
+bootstrap. The multistage rescaled bootstrap is still useful for
+single-stage designs with small sampling fractions, where it reduces
+to a half-sample replicate method.
+}
+\usage{
+bootweights(strata, psu, replicates = 50, fpc = NULL,
+ fpctype = c("population", "fraction", "correction"),
+ compress = TRUE)
+subbootweights(strata, psu, replicates = 50, compress = TRUE)
+mrbweights(clusters, stratas, fpcs, replicates=50,
+ multicore=getOption("survey.multicore"))
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{strata}{Identifier for sampling strata (top level only)}
+ \item{stratas}{data frame of strata for all stages of sampling}
+ \item{psu}{Identifier for primary sampling units}
+ \item{clusters}{data frame of identifiers for sampling units at each stage}
+ \item{replicates}{Number of bootstrap replicates}
+ \item{fpc}{Finite population correction (top level only) }
+ \item{fpctype}{Is \code{fpc} the population size, sampling fraction,
+ or 1-sampling fraction?}
+ \item{fpcs}{\code{survey_fpc} object with population and sample size at each stage}
+ \item{compress}{Should the replicate weights be compressed?}
+ \item{multicore}{Use the \code{multicore} package to generate the replicates in parallel}
+}
+
+\value{
+ A set of replicate weights
+}
+
+\section{warning}{With \code{multicore=TRUE} the resampling procedure does not
+use the current random seed, so the results cannot be exactly
+reproduced even by using \code{set.seed()}}
+
+\note{
+These bootstraps are strictly appropriate only when the first stage of
+sampling is a simple or stratified random sample of PSUs with or
+without replacement, and not (eg) for PPS sampling. The functions
+will not enforce simple random sampling, so they can be used
+(approximately) for data that have had non-response corrections and
+other weight adjustments. It is preferable to apply these adjustments
+after creating the bootstrap replicate weights, but that may not be
+possible with public-use data.
+
+}
+\references{Canty AJ, Davison AC. (1999) Resampling-based variance
+ estimation for labour force surveys. The Statistician 48:379-391
+
+ Judkins, D. (1990), "Fay's Method for Variance Estimation" Journal of Official Statistics, 6, 223-239.
+
+ Preston J. (2009) Rescaled bootstrap for stratified multistage sampling. Survey Methodology 35(2) 227-234
+
+ Rao JNK, Wu CFJ. Bootstrap inference for sample surveys. Proc Section
+ on Survey Research Methodology. 1993 (866--871)
+}
+
+\seealso{\code{\link{as.svrepdesign}}}
+
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/brrweights.Rd b/man/brrweights.Rd
new file mode 100755
index 0000000..720c9e3
--- /dev/null
+++ b/man/brrweights.Rd
@@ -0,0 +1,125 @@
+\name{brrweights}
+\alias{jk1weights}
+\alias{jknweights}
+\alias{brrweights}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Compute replicate weights }
+\description{
+ Compute replicate weights from a survey design. These functions are
+ usually called from \code{\link{as.svrepdesign}} rather than directly
+ by the user.
+}
+\usage{
+brrweights(strata, psu, match = NULL,
+ small = c("fail","split","merge"),
+ large = c("split", "merge", "fail"),
+ fay.rho=0, only.weights=FALSE,
+ compress=TRUE, hadamard.matrix=NULL)
+jk1weights(psu,fpc=NULL,
+ fpctype=c("population","fraction","correction"),
+ compress=TRUE)
+jknweights(strata,psu, fpc=NULL,
+ fpctype=c("population","fraction","correction"),
+ compress=TRUE,
+ lonely.psu=getOption("survey.lonely.psu"))
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{strata}{Stratum identifiers }
+ \item{psu}{PSU (cluster) identifier }
+ \item{match}{Optional variable to use in matching. }
+ \item{small}{How to handle strata with only one PSU}
+ \item{large}{How to handle strata with more than two PSUs}
+ \item{fpc}{Optional population (stratum) size or finite population correction }
+ \item{fpctype}{How \code{fpc} is coded.}
+ \item{fay.rho}{Parameter for Fay's extended BRR method}
+ \item{only.weights}{If \code{TRUE} return only the matrix of
+ replicate weights}
+ \item{compress}{If \code{TRUE}, store the replicate weights in
+ compressed form}
+ \item{hadamard.matrix}{Optional user-supplied Hadamard matrix for
+ \code{brrweights}}
+ \item{lonely.psu}{Handling of non-certainty single-PSU strata}
+}
+\details{
+ JK1 and JKn are jackknife schemes for unstratified and stratified
+ designs respectively. The finite population correction may be
+ specified as a single number, a vector with one entry per stratum, or
+ a vector with one entry per observation (constant within strata).
+ When \code{fpc} is a vector with one entry per stratum it may not have
+ names that differ from the stratum identifiers (it may have no names,
+ in which case it must be in the same order as
+ \code{unique(strata)}). To specify population stratum sizes use
+ \code{fpctype="population"}, to specify sampling fractions use
+ \code{fpctype="fraction"} and to specify the correction directly use
+ \code{fpctype="correction"}
+
+ The only reason not to use \code{compress=TRUE} is that it is new and
+ there is a greater possibility of bugs. It reduces the number of
+ rows of the replicate weights matrix from the number of observations
+ to the number of PSUs.
+
+ In BRR variance estimation each stratum is split in two to give
+ half-samples. Balanced replicated weights are needed, where
+ observations in two different strata end up in the same half stratum
+ as often as in different half-strata.BRR, strictly speaking, is
+ defined only when each stratum has exactly
+ two PSUs. A stratum with one PSU can be merged with another such
+ stratum, or can be split to appear in both half samples with half
+ weight. The latter approach is appropriate for a PSU that was
+ deterministically sampled.
+
+ A stratum with more than two PSUs can be split into multiple smaller
+ strata each with two PSUs or the PSUs can be merged to give two
+ superclusters within the stratum.
+
+ When merging small strata or grouping PSUs in large strata the
+ \code{match} variable is used to sort PSUs before merging, to give
+ approximate matching on this variable.
+
+ If you want more control than this you should probably construct your
+ own weights using the Hadamard matrices produced by \code{\link{hadamard}}
+
+
+}
+\value{
+For \code{brrweights} with \code{only.weights=FALSE} a list with elements
+ \item{weights}{two-column matrix indicating the weight for each
+ half-stratum in one particular set of split samples}
+ \item{wstrata}{New stratum variable incorporating merged or split strata}
+ \item{strata}{Original strata for distinct PSUs}
+ \item{psu}{Distinct PSUs}
+ \item{npairs}{Dimension of Hadamard matrix used in BRR construction}
+ \item{sampler}{function returning replicate weights}
+ \item{compress}{Indicates whether the \code{sampler} returns per PSU
+ or per observation weights}
+ For \code{jk1weights} and \code{jknweights} a data frame of replicate
+ weights and the \code{scale} and \code{rscale} arguments to \code{\link{svrVar}}.
+}
+\references{Levy and Lemeshow "Sampling of Populations". Wiley.
+
+ Shao and Tu "The Jackknife and Bootstrap". Springer.
+}
+
+\seealso{\code{\link{hadamard}}, \code{\link{as.svrepdesign}},
+ \code{\link{svrVar}}, \code{\link{surveyoptions}}}
+
+\examples{
+data(scd)
+scdnofpc<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
+nest=TRUE)
+
+## convert to BRR replicate weights
+scd2brr <- as.svrepdesign(scdnofpc, type="BRR")
+svymean(~alive, scd2brr)
+svyratio(~alive, ~arrests, scd2brr)
+
+## with user-supplied hadamard matrix
+scd2brr1 <- as.svrepdesign(scdnofpc, type="BRR", hadamard.matrix=paley(11))
+svymean(~alive, scd2brr1)
+svyratio(~alive, ~arrests, scd2brr1)
+
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/calibrate.Rd b/man/calibrate.Rd
new file mode 100644
index 0000000..f3df8c0
--- /dev/null
+++ b/man/calibrate.Rd
@@ -0,0 +1,318 @@
+\name{calibrate}
+\alias{calibrate.survey.design2}
+\alias{calibrate.svyrep.design}
+%\alias{is.calibrated}
+\alias{calibrate}
+%\alias{regcalibrate.survey.design2}
+%\alias{regcalibrate.svyrep.design}
+%\alias{regcalibrate}
+\alias{calibrate.twophase}
+\alias{grake}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Calibration (GREG) estimators}
+\description{
+ Calibration, generalized raking, or GREG estimators generalise post-stratification and
+ raking by calibrating a sample to the marginal totals of
+ variables in a linear regression model. This function reweights the
+ survey design and adds additional information that is used by
+ \code{svyrecvar} to reduce the estimated standard errors.
+}
+\usage{
+calibrate(design,...)
+\method{calibrate}{survey.design2}(design, formula, population,
+ aggregate.stage=NULL, stage=0, variance=NULL,
+ bounds=c(-Inf,Inf), calfun=c("linear","raking","logit"),
+ maxit=50,epsilon=1e-7,verbose=FALSE,force=FALSE,trim=NULL,...)
+\method{calibrate}{svyrep.design}(design, formula, population,compress=NA,
+ aggregate.index=NULL, variance=NULL, bounds=c(-Inf,Inf),
+ calfun=c("linear","raking","logit"),
+ maxit=50, epsilon=1e-7, verbose=FALSE,force=FALSE,trim=NULL, ...)
+\method{calibrate}{twophase}(design, phase=2,formula, population,
+ calfun=c("linear","raking","logit","rrz"),...)
+grake(mm,ww,calfun,eta=rep(0,NCOL(mm)),bounds,population,epsilon, verbose,maxit)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{design}{survey design object}
+ \item{formula}{model formula for calibration model, or list of
+ formulas for each margin}
+ \item{population}{Vectors of population column totals for the model matrix in the
+ calibration model, or list of such vectors for each
+ cluster, or list of tables for each margin. Required except for two-phase designs}
+ \item{compress}{compress the resulting replicate weights if
+ \code{TRUE} or if \code{NA} and weights were previously compressed}
+ \item{stage}{See Details below}
+ \item{variance}{Coefficients for variance in calibration model (see
+ Details below)}
+ \item{aggregate.stage}{An integer. If not \code{NULL}, make calibration weights
+ constant within sampling units at this stage.}
+ \item{aggregate.index}{A vector or one-sided formula. If not \code{NULL}, make calibration weights
+ constant within levels of this variable}
+ \item{bounds}{Bounds for the calibration weights, optional
+ except for \code{calfun="logit"}}
+ \item{trim}{Weights outside this range will be trimmed to these bounds.}
+ \item{...}{options for other methods}
+ \item{calfun}{Calibration function: see below}
+ \item{maxit}{Number of iterations}
+ \item{epsilon}{tolerance in matching population total. Either a single
+ number or a vector of the same length as \code{population}}
+ \item{verbose}{print lots of uninteresting information}
+ \item{force}{Return an answer even if the specified accuracy was not achieved}
+ \item{phase}{Phase of a two-phase design to calibrate (only
+ \code{phase=2} currently implemented.)}
+ \item{mm}{model matrix}
+ \item{ww}{vector of weights}
+ \item{eta}{starting values for iteration}
+}
+\details{
+
+ The \code{formula} argument specifies a model matrix, and the
+ \code{population} argument is the population column sums of this
+ matrix.
+
+ For the important special case where the calibration totals are (possibly
+ overlapping) marginal tables of factor variables, as in classical
+ raking, the \code{formula} and \code{population} arguments may be
+ lists in the same format as the input to \code{\link{rake}}.
+
+ If the \code{population} argument has a names attribute it will be
+ checked against the names produced by \code{model.matrix(formula)} and
+ reordered if necessary. This protects against situations where the
+ (locale-dependent) ordering of factor levels is not what you expected.
+
+ Numerical instabilities may result if the sampling weights in the
+ \code{design} object are wrong by multiple orders of magnitude. The
+ code now attempts to rescale the weights first, but it is better for
+ the user to ensure that the scale is reasonable.
+
+ The \code{calibrate} function implements linear, bounded linear,
+ raking, bounded raking, and logit calibration functions. All except
+ unbounded linear calibration use the Newton-Raphson algorithm
+ described by Deville et al (1993). This algorithm is exposed for other
+ uses in the \code{grake} function. Unbounded linear calibration uses
+ an algorithm that is less sensitive to collinearity. The calibration
+ function may be specified as a string naming one of the three built-in
+ functions or as an object of class \code{calfun}, allowing
+ user-defined functions. See \code{\link{make.calfun}} for details.
+
+ Calibration with bounds, or on highly collinear data, may fail. If
+ \code{force=TRUE} the approximately calibrated design object will
+ still be returned (useful for examining why it failed). A failure in
+ calibrating a set of replicate weights when the sampling weights were
+ successfully calibrated will give only a warning, not an error.
+
+ When calibration to the desired set of bounds is not possible, another option is
+ to trim weights. To do this set \code{bounds} to a looser set of bounds
+ for which calibration is achievable and set \code{trim} to the tighter
+ bounds. Weights outside the bounds will be trimmed to the bounds, and
+ the excess weight distributed over other observations in proportion to
+ their sampling weight (and so this may put some other observations
+ slightly over the trimming bounds). The projection matrix used in computing
+ standard errors is based on the feasible bounds specified by the
+ \code{bounds} argument. See also \code{\link{trimWeights}},
+ which trims the final weights in a design object rather than the
+ calibration adjustments.
+
+
+ For two-phase designs \code{calfun="rrz"} estimates the sampling
+ probabilities using logistic regression as described by Robins et al
+ (1994). \code{\link{estWeights}} will do the same thing.
+
+ Calibration may result in observations within the last-stage sampling
+ units having unequal weight even though they necessarily are sampled
+ together. Specifying \code{aggegrate.stage} ensures that the
+ calibration weight adjustments are constant within sampling units at
+ the specified stage; if the original sampling weights were equal the
+ final weights will also be equal. The algorithm is as described by
+ Vanderhoeft (2001, section III.D). Specifying \code{aggregate.index}
+ does the same thing for replicate weight designs; a warning will be
+ given if the original weights are not constant within levels of
+ \code{aggregate.index}.
+
+ In a model with two-stage sampling, population totals may be available
+ for the PSUs actually sampled, but not for the whole population. In
+ this situation, calibrating within each PSU reduces with second-stage
+ contribution to variance. This generalizes to multistage sampling.
+ The \code{stage} argument specifies which stage of sampling the totals
+ refer to. Stage 0 is full population totals, stage 1 is totals for
+ PSUs, and so on. The default, \code{stage=NULL} is interpreted as
+ stage 0 when a single population vector is supplied and stage 1 when a
+ list is supplied. Calibrating to PSU totals will fail (with a message
+ about an exactly singular matrix) for PSUs that have fewer
+ observations than the number of calibration variables.
+
+ For unbounded linear calibration only, the variance in the calibration
+ model may depend on covariates. If \code{variance=NULL} the
+ calibration model has constant variance. If \code{variance} is not \code{NULL}
+ it specifies a linear combination of the columns of the model matrix
+ and the calibration variance is proportional to that linear
+ combination.
+
+ The design matrix specified by formula (after any aggregation) must be
+ of full rank, with one exception. If the population total for a column
+ is zero and all the observations are zero the column will be
+ ignored. This allows the use of factors where the population happens
+ to have no observations at some level.
+
+ In a two-phase design, \code{population} may be omitted when
+ \code{phase=2}, to specify calibration to the phase-one sample. If the
+ two-phase design object was constructed using the more memory-efficient
+ \code{method="approx"} argument to \code{\link{twophase}}, calibration of the first
+ phase of sampling to the population is not supported.
+
+
+}
+\value{
+ A survey design object.
+}
+
+\references{
+Deville J-C, Sarndal C-E, Sautory O (1993) Generalized Raking
+Procedures in Survey Sampling. JASA 88:1013-1020
+
+Kalton G, Flores-Cervantes I (2003) "Weighting methods" J Official
+ Stat 19(2) 81-97
+
+Lumley T, Shaw PA, Dai JY (2011) "Connections between survey calibration estimators and semiparametric models for incomplete data" International Statistical Review. 79:200-220. (with discussion 79:221-232)
+
+Sarndal C-E, Swensson B, Wretman J. "Model Assisted Survey
+Sampling". Springer. 1991.
+
+Rao JNK, Yung W, Hidiroglou MA (2002) Estimating equations for the
+analysis of survey data using poststratification information. Sankhya
+64 Series A Part 2, 364-378.
+
+Robins JM, Rotnitzky A, Zhao LP. (1994) Estimation of regression
+coefficients when some regressors are not always observed. Journal of
+the American Statistical Association, 89, 846-866.
+
+Vanderhoeft C (2001) Generalized Calibration at Statistics
+Belgium. Statistics Belgium Working Paper No 3.
+\url{http://statbel.fgov.be/nl/binaries/paper03\%5B1\%5D_tcm325-35412.pdf}
+}
+
+\seealso{ \code{\link{postStratify}}, \code{\link{rake}} for other ways
+ to use auxiliary information
+
+ \code{\link{twophase}} and \code{vignette("epi")} for an example of calibration in two-phase designs
+
+ \code{survey/tests/kalton.R} for examples replicating those in Kalton & Flores-Cervantes (2003)
+
+ \code{\link{make.calfun}} for user-defined calibration distances.
+
+ \code{\link{trimWeights}} to trim final weights rather than calibration adjustments.
+}
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
+
+## For a single factor variable this is equivalent to
+## postStratify
+
+(dclus1g<-calibrate(dclus1, ~stype, pop.totals))
+
+svymean(~api00, dclus1g)
+svytotal(~enroll, dclus1g)
+svytotal(~stype, dclus1g)
+
+## Make weights constant within school district
+(dclus1agg<-calibrate(dclus1, ~stype, pop.totals, aggregate=1))
+svymean(~api00, dclus1agg)
+svytotal(~enroll, dclus1agg)
+svytotal(~stype, dclus1agg)
+
+
+## Now add sch.wide
+(dclus1g2 <- calibrate(dclus1, ~stype+sch.wide, c(pop.totals, sch.wideYes=5122)))
+
+svymean(~api00, dclus1g2)
+svytotal(~enroll, dclus1g2)
+svytotal(~stype, dclus1g2)
+
+## Finally, calibrate on 1999 API and school type
+
+(dclus1g3 <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069)))
+
+svymean(~api00, dclus1g3)
+svytotal(~enroll, dclus1g3)
+svytotal(~stype, dclus1g3)
+
+
+## Same syntax with replicate weights
+rclus1<-as.svrepdesign(dclus1)
+
+(rclus1g3 <- calibrate(rclus1, ~stype+api99, c(pop.totals, api99=3914069)))
+
+svymean(~api00, rclus1g3)
+svytotal(~enroll, rclus1g3)
+svytotal(~stype, rclus1g3)
+
+(rclus1agg3 <- calibrate(rclus1, ~stype+api99, c(pop.totals,api99=3914069), aggregate.index=~dnum))
+
+svymean(~api00, rclus1agg3)
+svytotal(~enroll, rclus1agg3)
+svytotal(~stype, rclus1agg3)
+
+
+###
+## Bounded weights
+range(weights(dclus1g3)/weights(dclus1))
+dclus1g3b <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069),bounds=c(0.6,1.6))
+range(weights(dclus1g3b)/weights(dclus1))
+
+svymean(~api00, dclus1g3b)
+svytotal(~enroll, dclus1g3b)
+svytotal(~stype, dclus1g3b)
+
+## trimming
+dclus1tr <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069),
+ bounds=c(0.5,2), trim=c(2/3,3/2))
+svymean(~api00+api99+enroll, dclus1tr)
+svytotal(~stype,dclus1tr)
+range(weights(dclus1tr)/weights(dclus1))
+
+rclus1tr <- calibrate(rclus1, ~stype+api99, c(pop.totals, api99=3914069),
+ bounds=c(0.5,2), trim=c(2/3,3/2))
+svymean(~api00+api99+enroll, rclus1tr)
+svytotal(~stype,rclus1tr)
+
+## Input in the same format as rake() for classical raking
+pop.table <- xtabs(~stype+sch.wide,apipop)
+pop.table2 <- xtabs(~stype+comp.imp,apipop)
+dclus1r<-rake(dclus1, list(~stype+sch.wide, ~stype+comp.imp),
+ list(pop.table, pop.table2))
+gclus1r<-calibrate(dclus1, formula=list(~stype+sch.wide, ~stype+comp.imp),
+ population=list(pop.table, pop.table2),calfun="raking")
+svymean(~api00+stype, dclus1r)
+svymean(~api00+stype, gclus1r)
+
+
+## generalised raking
+dclus1g3c <- calibrate(dclus1, ~stype+api99, c(pop.totals,
+ api99=3914069), calfun="raking")
+range(weights(dclus1g3c)/weights(dclus1))
+
+(dclus1g3d <- calibrate(dclus1, ~stype+api99, c(pop.totals,
+ api99=3914069), calfun=cal.logit, bounds=c(0.5,2.5)))
+range(weights(dclus1g3d)/weights(dclus1))
+
+
+
+## Ratio estimators are calibration estimators
+dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+svytotal(~api.stu,dstrat)
+
+common<-svyratio(~api.stu, ~enroll, dstrat, separate=FALSE)
+predict(common, total=3811472)
+
+pop<-3811472
+## equivalent to (common) ratio estimator
+dstratg1<-calibrate(dstrat,~enroll-1, pop, variance=1)
+svytotal(~api.stu, dstratg1)
+
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/compressWeights.Rd b/man/compressWeights.Rd
new file mode 100755
index 0000000..2b32dd0
--- /dev/null
+++ b/man/compressWeights.Rd
@@ -0,0 +1,45 @@
+\name{compressWeights}
+\alias{compressWeights}
+\alias{compressWeights.default}
+\alias{compressWeights.repweights_compressed}
+\alias{[.repweights_compressed}
+\alias{dim.repweights_compressed}
+\alias{dimnames.repweights_compressed}
+\alias{as.matrix.repweights_compressed}
+\alias{as.matrix.repweights}
+\alias{as.vector.repweights_compressed}
+\alias{compressWeights.svyrep.design}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Compress replicate weight matrix}
+\description{
+ Many replicate weight matrices have redundant rows, such as when
+ weights are the same for all observations in a PSU. This function
+ produces a compressed form. Methods for \code{as.matrix} and
+ \code{as.vector} extract and expand the weights.
+}
+\usage{
+compressWeights(rw, ...)
+\method{compressWeights}{svyrep.design}(rw,...)
+\method{as.matrix}{repweights_compressed}(x,...)
+\method{as.vector}{repweights_compressed}(x,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{rw}{A set of replicate weights or a \code{svyrep.design} object}
+ \item{x}{A compressed set of replicate weights}
+ \item{\dots}{For future expansion}
+}
+
+\value{
+ An object of class \code{repweights_compressed} or a
+ \code{svyrep.design} object with \code{repweights} element of class \code{repweights_compressed}
+}
+\seealso{\code{\link{jknweights}},\code{\link{as.svrepdesign}}}
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+rclus1c<-as.svrepdesign(dclus1,compress=TRUE)
+rclus1<-as.svrepdesign(dclus1,compress=FALSE)
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/confint.svyglm.Rd b/man/confint.svyglm.Rd
new file mode 100644
index 0000000..6747400
--- /dev/null
+++ b/man/confint.svyglm.Rd
@@ -0,0 +1,52 @@
+\name{confint.svyglm}
+\alias{confint.svyglm}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Confidence intervals for regression parameters }
+\description{
+Computes confidence intervals for regression parameters in
+\code{\link{svyglm}} objects. The default is a Wald-type confidence
+interval, adding and subtracting a multiple of the standard error. The
+\code{method="likelihood"} is an interval based on inverting the Rao-Scott
+likelihood ratio test. That is, it is an interval where the working
+model deviance is lower than the threshold for the Rao-Scott test at the
+specified level.
+}
+\usage{
+\method{confint}{svyglm}(object, parm, level = 0.95, method = c("Wald", "likelihood"), ddf = Inf, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{object}{\code{svyglm} object}
+ \item{parm}{numeric or character vector indicating which parameters to
+ construct intervals for.}
+ \item{level}{desired coverage}
+ \item{method}{See description above }
+ \item{ddf}{Denominator degrees of freedom for \code{"likelihood"}
+ method, to use a t distribution rather than norma. If \code{NULL},
+ use \code{object$df.residual}}
+ \item{\dots}{for future expansion}
+}
+
+\value{
+ A matrix of confidence intervals
+}
+\references{
+J. N. K. Rao and Alistair J. Scott (1984) On Chi-squared Tests For
+ Multiway Contigency Tables with Proportions Estimated From Survey
+ Data. Annals of Statistics 12:46-60
+}
+
+\seealso{\code{\link{confint}} }
+\examples{
+data(api)
+dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
+
+m<-svyglm(I(comp.imp=="Yes")~stype*emer+ell, design=dclus2, family=quasibinomial)
+confint(m)
+confint(m, method="like",ddf=NULL, parm=c("ell","emer"))
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/crowd.Rd b/man/crowd.Rd
new file mode 100755
index 0000000..e3c228f
--- /dev/null
+++ b/man/crowd.Rd
@@ -0,0 +1,40 @@
+\name{crowd}
+\alias{crowd}
+\docType{data}
+\title{Household crowding}
+\description{
+A tiny dataset from the VPLX manual.
+}
+\usage{data(crowd)}
+\format{
+ A data frame with 6 observations on the following 5 variables.
+ \describe{
+ \item{rooms}{Number of rooms in the house}
+ \item{person}{Number of people in the household}
+ \item{weight}{Sampling weight}
+ \item{cluster}{Cluster number}
+ \item{stratum}{Stratum number}
+ }
+}
+\source{
+ Manual for VPLX, Census Bureau.
+}
+\examples{
+data(crowd)
+
+## Example 1-1
+i1.1<-as.svrepdesign(svydesign(id=~cluster, weight=~weight,data=crowd))
+i1.1<-update(i1.1, room.ratio=rooms/person,
+overcrowded=factor(person>rooms))
+svymean(~rooms+person+room.ratio,i1.1)
+svytotal(~rooms+person+room.ratio,i1.1)
+svymean(~rooms+person+room.ratio,subset(i1.1,overcrowded==TRUE))
+svytotal(~rooms+person+room.ratio,subset(i1.1,overcrowded==TRUE))
+
+## Example 1-2
+i1.2<-as.svrepdesign(svydesign(id=~cluster,weight=~weight,strata=~stratum, data=crowd))
+svymean(~rooms+person,i1.2)
+svytotal(~rooms+person,i1.2)
+
+}
+\keyword{datasets}
diff --git a/man/dimnames.DBIsvydesign.Rd b/man/dimnames.DBIsvydesign.Rd
new file mode 100644
index 0000000..95a9cf9
--- /dev/null
+++ b/man/dimnames.DBIsvydesign.Rd
@@ -0,0 +1,54 @@
+\name{dimnames.DBIsvydesign}
+\alias{dimnames.DBIsvydesign}
+\alias{dimnames.ODBCsvydesign}
+\alias{dimnames.survey.design}
+\alias{dimnames.svyrep.design}
+\alias{dimnames.twophase}
+\alias{dimnames.svyimputationList}
+\alias{dim.DBIsvydesign}
+\alias{dim.ODBCsvydesign}
+\alias{dim.survey.design}
+\alias{dim.twophase}
+\alias{dim.svyimputationList}
+\alias{dim.svyrep.design}
+
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Dimensions of survey designs}
+\description{
+\code{dimnames} returns variable names and row names for the data
+variables in a design object and \code{dim} returns dimensions.
+For multiple imputation designs there is a third dimension giving the
+number of imputations. For database-backed designs the second dimension
+includes variables defined by \code{update}. The first dimension
+excludes observations with zero weight.
+}
+\usage{
+\method{dim}{survey.design}(x)
+\method{dim}{svyimputationList}(x)
+\method{dimnames}{survey.design}(x)
+\method{dimnames}{DBIsvydesign}(x)
+\method{dimnames}{ODBCsvydesign}(x)
+\method{dimnames}{svyimputationList}(x)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{x}{Design object}
+
+}
+
+\value{
+A vector of numbers for \code{dim}, a list of vectors of strings for \code{dimnames}.
+}
+
+\seealso{ \code{\link{update.DBIsvydesign}}, \code{\link{with.svyimputationList}}}
+\examples{
+data(api)
+dclus1 <- svydesign(ids=~dnum,weights=~pw,data=apiclus1,fpc=~fpc)
+dim(dclus1)
+dimnames(dclus1)
+colnames(dclus1)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/election.Rd b/man/election.Rd
new file mode 100644
index 0000000..fa68f5d
--- /dev/null
+++ b/man/election.Rd
@@ -0,0 +1,72 @@
+\name{election}
+\alias{election}
+\alias{election_pps}
+\alias{election_jointprob}
+\alias{election_jointHR}
+\alias{election_insample}
+\docType{data}
+\title{US 2004 presidential election data at state or county level}
+\description{
+A sample of voting data from US states or counties (depending on data
+availability), sampled with probability proportional to number of votes. The sample was drawn using Tille's splitting method, implemented in the "sampling" package.
+}
+\usage{data(election)}
+\format{
+ \code{election} is a data frame with 4600 observations on the following 8 variables.
+ \describe{
+ \item{\code{County}}{A factor specifying the state or country}
+ \item{\code{TotPrecincts}}{Number of precincts in the state or county}
+ \item{\code{PrecinctsReporting}}{Number of precincts supplying data}
+ \item{\code{Bush}}{Votes for George W. Bush}
+ \item{\code{Kerry}}{Votes for John Kerry}
+ \item{\code{Nader}}{Votes for Ralph Nader}
+ \item{\code{votes}}{Total votes for those three candidates}
+ \item{\code{p}}{Sampling probability, proportional to \code{votes}}
+ }
+
+ \code{election_pps} is a sample of 40 counties or states taken with
+ probability proportional to the number of votes. It includes the
+ additional column \code{wt} with the sampling weights.
+
+ \code{election_insample} indicates which rows of \code{election} were sampled.
+
+ \code{election_jointprob} are the pairwise sampling probabilities and
+ \code{election_jointHR} are approximate pairwise sampling probabilities using
+ the Hartley-Rao approximation.
+}
+\source{
+.
+}
+
+\examples{
+data(election)
+## high positive correlation between totals
+plot(Bush~Kerry,data=election,log="xy")
+## high negative correlation between proportions
+plot(I(Bush/votes)~I(Kerry/votes), data=election)
+
+## Variances without replacement
+## Horvitz-Thompson type
+dpps_br<- svydesign(id=~1, fpc=~p, data=election_pps, pps="brewer")
+dpps_ov<- svydesign(id=~1, fpc=~p, data=election_pps, pps="overton")
+dpps_hr<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40))
+dpps_hr1<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR())
+dpps_ht<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob))
+## Yates-Grundy type
+dpps_yg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob),variance="YG")
+dpps_hryg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40),variance="YG")
+
+## The with-replacement approximation
+dppswr <-svydesign(id=~1, probs=~p, data=election_pps)
+
+svytotal(~Bush+Kerry+Nader, dpps_ht)
+svytotal(~Bush+Kerry+Nader, dpps_yg)
+svytotal(~Bush+Kerry+Nader, dpps_hr)
+svytotal(~Bush+Kerry+Nader, dpps_hryg)
+svytotal(~Bush+Kerry+Nader, dpps_hr1)
+svytotal(~Bush+Kerry+Nader, dpps_br)
+svytotal(~Bush+Kerry+Nader, dpps_ov)
+svytotal(~Bush+Kerry+Nader, dppswr)
+}
+\keyword{datasets}
+\keyword{survey}
diff --git a/man/estweights.Rd b/man/estweights.Rd
new file mode 100644
index 0000000..d26d286
--- /dev/null
+++ b/man/estweights.Rd
@@ -0,0 +1,86 @@
+\name{estweights}
+\alias{estWeights}
+\alias{estWeights.twophase}
+\alias{estWeights.data.frame}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Estimated weights for missing data}
+\description{
+Creates or adjusts a two-phase survey design object using a logistic
+regression model for second-phase sampling probability. This function
+should be particularly useful in reweighting to account for missing data. }
+\usage{
+estWeights(data,formula,...)
+\method{estWeights}{twophase}(data,formula=NULL, working.model=NULL,...)
+\method{estWeights}{data.frame}(data,formula=NULL, working.model=NULL,
+ subset=NULL, strata=NULL,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{data}{twophase design object or data frame}
+ \item{formula}{Predictors for estimating weights}
+ \item{working.model}{Model fitted to complete (ie phase 1) data}
+ \item{subset}{Subset of data frame with complete data (ie phase 1).
+ If \code{NULL} use all complete cases}
+ \item{strata}{Stratification (if any) of phase 2 sampling}
+ \item{\dots}{for future expansion}
+}
+\details{
+ If \code{data} is a data frame, \code{estWeights} first creates a
+ two-phase design object. The \code{strata} argument is used only to
+ compute finite population corrections, the same variables must be
+ included in \code{formula} to compute stratified sampling probabilities.
+
+ With a two-phase design object, \code{estWeights} estimates the sampling
+ probabilities using logistic regression as described by Robins et al
+ (1994) and adds information to the object to enable correct sandwich
+ standard errors to be computed.
+
+ An alternative to specifying \code{formula} is to specify
+ \code{working.model}. The estimating functions from this model will be
+ used as predictors of the sampling probabilities, which will increase
+ efficiency to the extent that the working model and the model of
+ interest estimate the same parameters (Kulich \& Lin 2004).
+
+ The effect on a two-phase design object is very similar to
+ \code{\link{calibrate}}, and is identical when \code{formula}
+ specifies a saturated model.
+}
+\value{
+ A two-phase survey design object.
+}
+
+\references{
+Breslow NE, Lumley T, Ballantyne CM, Chambless LE, Kulich M. (2009) Using the Whole Cohort in the Analysis of Case-Cohort Data. Am J Epidemiol. 2009 Jun 1;169(11):1398-405.
+
+Robins JM, Rotnitzky A, Zhao LP. (1994) Estimation of regression
+coefficients when some regressors are not always observed. Journal of
+the American Statistical Association, 89, 846-866.
+
+Kulich M, Lin DY (2004). Improving the Efficiency of Relative-Risk
+Estimation in Case-Cohort Studies. Journal of the American Statistical Association, Vol. 99, pp.832-844
+
+Lumley T, Shaw PA, Dai JY (2011) "Connections between survey calibration estimators and semiparametric models for incomplete data" International Statistical Review. 79:200-220. (with discussion 79:221-232)
+
+}
+
+\seealso{ \code{\link{postStratify}},
+ \code{\link{calibrate}}, \code{\link{twophase}}}
+\examples{
+data(airquality)
+
+## ignoring missingness, using model-based standard error
+summary(lm(log(Ozone)~Temp+Wind, data=airquality))
+
+## Without covariates to predict missingness we get
+## same point estimates, but different (sandwich) standard errors
+daq<-estWeights(airquality, formula=~1,subset=~I(!is.na(Ozone)))
+summary(svyglm(log(Ozone)~Temp+Wind,design=daq))
+
+## Reweighting based on weather, month
+d2aq<-estWeights(airquality, formula=~Temp+Wind+Month,
+ subset=~I(!is.na(Ozone)))
+summary(svyglm(log(Ozone)~Temp+Wind,design=d2aq))
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/fpc.Rd b/man/fpc.Rd
new file mode 100755
index 0000000..4d7a037
--- /dev/null
+++ b/man/fpc.Rd
@@ -0,0 +1,72 @@
+\name{fpc}
+\alias{fpc}
+\non_function{}
+\title{Small survey example}
+\usage{data(fpc)}
+\description{
+The \code{fpc} data frame has 8 rows and 6 columns. It is artificial
+data to illustrate survey sampling estimators.
+}
+\format{
+ This data frame contains the following columns:
+ \describe{
+ \item{stratid}{Stratum ids}
+ \item{psuid}{Sampling unit ids}
+ \item{weight}{Sampling weights}
+ \item{nh}{number sampled per stratum}
+ \item{Nh}{population size per stratum}
+ \item{x}{data}
+ }
+}
+
+\source{
+\url{http://www.stata-press.com/data/r7/fpc.dta}
+}
+
+\examples{
+data(fpc)
+fpc
+
+
+withoutfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid, variables=~x,
+ data=fpc, nest=TRUE)
+
+withoutfpc
+svymean(~x, withoutfpc)
+
+withfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid,
+fpc=~Nh, variables=~x, data=fpc, nest=TRUE)
+
+withfpc
+svymean(~x, withfpc)
+
+## Other equivalent forms
+withfpc<-svydesign(prob=~I(1/weight), ids=~psuid, strata=~stratid,
+fpc=~Nh, variables=~x, data=fpc, nest=TRUE)
+
+svymean(~x, withfpc)
+
+withfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid,
+fpc=~I(nh/Nh), variables=~x, data=fpc, nest=TRUE)
+
+svymean(~x, withfpc)
+
+withfpc<-svydesign(weights=~weight, ids=~interaction(stratid,psuid),
+strata=~stratid, fpc=~I(nh/Nh), variables=~x, data=fpc)
+
+svymean(~x, withfpc)
+
+withfpc<-svydesign(ids=~psuid, strata=~stratid, fpc=~Nh,
+ variables=~x,data=fpc,nest=TRUE)
+
+svymean(~x, withfpc)
+
+withfpc<-svydesign(ids=~psuid, strata=~stratid,
+fpc=~I(nh/Nh), variables=~x, data=fpc, nest=TRUE)
+
+svymean(~x, withfpc)
+
+
+
+}
+\keyword{datasets}
diff --git a/man/ftable.svystat.Rd b/man/ftable.svystat.Rd
new file mode 100755
index 0000000..bde85dc
--- /dev/null
+++ b/man/ftable.svystat.Rd
@@ -0,0 +1,57 @@
+\name{ftable.svystat}
+\alias{ftable.svystat}
+\alias{ftable.svrepstat}
+\alias{ftable.svyby}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Lay out tables of survey statistics}
+\description{
+Reformat the output of survey computations to a table.
+}
+\usage{
+\method{ftable}{svystat}(x, rownames,...)
+\method{ftable}{svrepstat}(x, rownames,...)
+\method{ftable}{svyby}(x,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{x}{Output of functions such as \code{svymean},\code{svrepmean}, \code{svyby}}
+ \item{rownames}{List of vectors of strings giving dimension names for
+ the resulting table (see examples)}
+ \item{...}{Arguments for future expansion}
+}
+\value{
+ An object of class \code{"ftable"}
+}
+
+\seealso{ \code{\link{ftable}}}
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+a<-svymean(~interaction(stype,comp.imp), design=dclus1)
+b<-ftable(a, rownames=list(stype=c("E","H","M"),comp.imp=c("No","Yes")))
+b
+
+a<-svymean(~interaction(stype,comp.imp), design=dclus1, deff=TRUE)
+b<-ftable(a, rownames=list(stype=c("E","H","M"),comp.imp=c("No","Yes")))
+round(100*b,1)
+
+rclus1<-as.svrepdesign(dclus1)
+a<-svytotal(~interaction(stype,comp.imp), design=rclus1)
+b<-ftable(a, rownames=list(stype=c("E","H","M"),comp.imp=c("No","Yes")))
+b
+round(b)
+
+a<-svyby(~api99 + api00, ~stype + sch.wide, rclus1, svymean, keep.var=TRUE)
+ftable(a)
+print(ftable(a),digits=2)
+
+b<-svyby(~api99 + api00, ~stype + sch.wide, rclus1, svymean, keep.var=TRUE, deff=TRUE)
+print(ftable(b),digits=2)
+
+d<-svyby(~api99 + api00, ~stype + sch.wide, rclus1, svymean, keep.var=TRUE, vartype=c("se","cvpct"))
+round(ftable(d),1)
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/hadamard.Rd b/man/hadamard.Rd
new file mode 100755
index 0000000..642c992
--- /dev/null
+++ b/man/hadamard.Rd
@@ -0,0 +1,67 @@
+\name{hadamard}
+\alias{hadamard}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Hadamard matrices }
+\description{
+ Returns a Hadamard matrix of dimension larger than the argument.
+}
+\usage{
+hadamard(n)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{n}{lower bound for size }
+}
+
+\value{
+A Hadamard matrix
+}
+\details{
+ For most \code{n} the matrix comes from \code{\link{paley}}. The
+ \eqn{36\times 36}{36x36} matrix is from Plackett and Burman (1946)
+ and the \eqn{28\times 28}{28x28} is from Sloane's library of Hadamard
+ matrices.
+
+ Matrices of dimension every multiple of 4 are thought to exist, but
+ this function doesn't know about all of them, so it will sometimes
+ return matrices that are larger than necessary. The excess is at most
+ 4 for \eqn{n<180}{n<180} and at most 5\% for \eqn{n>100}{n>100}.
+
+}
+\note{Strictly speaking, a Hadamard matrix has entries +1 and -1 rather
+ than 1 and 0, so \code{2*hadamard(n)-1} is a Hadamard matrix}
+\references{
+ Sloane NJA. A Library of Hadamard Matrices \url{http://neilsloane.com/hadamard/}
+
+
+Plackett RL, Burman JP. (1946) The Design of Optimum Multifactorial Experiments
+Biometrika, Vol. 33, No. 4 pp. 305-325
+
+ Cameron PJ (2005) Hadamard Matrices
+\url{http://designtheory.org/library/encyc/topics/had.pdf}. In: The
+Encyclopedia of Design Theory \url{http://designtheory.org/library/encyc/}
+ }
+\seealso{\code{\link{brrweights}}, \code{\link{paley}}}
+\examples{
+
+par(mfrow=c(2,2))
+## Sylvester-type
+image(hadamard(63),main=quote("Sylvester: "*64==2^6))
+## Paley-type
+image(hadamard(59),main=quote("Paley: "*60==59+1))
+## from NJ Sloane's library
+image(hadamard(27),main=quote("Stored: "*28))
+## For n=90 we get 96 rather than the minimum possible size, 92.
+image(hadamard(90),main=quote("Constructed: "*96==2^3\%*\%(11+1)))
+
+par(mfrow=c(1,1))
+plot(2:150,sapply(2:150,function(i) ncol(hadamard(i))),type="S",
+ ylab="Matrix size",xlab="n",xlim=c(1,150),ylim=c(1,150))
+abline(0,1,lty=3)
+lines(2:150, 2:150-(2:150 \%\% 4)+4,col="purple",type="S",lty=2)
+legend(c(x=10,y=140),legend=c("Actual size","Minimum possible size"),
+ col=c("black","purple"),bty="n",lty=c(1,2))
+
+}
+\keyword{survey}
+
diff --git a/man/hospital.Rd b/man/hospital.Rd
new file mode 100755
index 0000000..657ecf5
--- /dev/null
+++ b/man/hospital.Rd
@@ -0,0 +1,36 @@
+\name{hospital}
+\alias{hospital}
+\non_function{}
+\title{Sample of obstetric hospitals }
+\usage{data(hospital)}
+\description{
+The \code{hospital} data frame has 15 rows and 5 columns.
+}
+\format{
+ This data frame contains the following columns:
+ \describe{
+ \item{hospno}{Hospital id}
+ \item{oblevel}{level of obstetric care}
+ \item{weighta}{Weights, as given by the original reference}
+ \item{tothosp}{total hospitalisations}
+ \item{births}{births}
+ \item{weightats}{Weights, as given in the source}
+ }
+}
+\source{
+\url{http://www.ats.ucla.edu/stat/books/sop/hospsamp.dta}
+}
+\references{
+Levy and Lemeshow. "Sampling of Populations" (3rd edition). Wiley.
+}
+\examples{
+data(hospital)
+hospdes<-svydesign(strata=~oblevel, id=~hospno, weights=~weighta,
+fpc=~tothosp, data=hospital)
+hosprep<-as.svrepdesign(hospdes)
+
+svytotal(~births, design=hospdes)
+svytotal(~births, design=hosprep)
+
+}
+\keyword{datasets}
diff --git a/man/make.calfun.Rd b/man/make.calfun.Rd
new file mode 100644
index 0000000..203f4d5
--- /dev/null
+++ b/man/make.calfun.Rd
@@ -0,0 +1,65 @@
+\name{make.calfun}
+\alias{make.calfun}
+\alias{cal.linear}
+\alias{cal.raking}
+\alias{cal.logit}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Calibration metrics}
+\description{
+Create calibration metric for use in \code{\link{calibrate}}. The
+function \code{F} is the link function described in section 2 of
+Deville et al. To create a new calibration metric, specify \eqn{F-1}{F-1} and its
+derivative. The package provides \code{cal.linear}, \code{cal.raking},
+and \code{cal.logit}.
+}
+\usage{
+make.calfun(Fm1, dF, name)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{Fm1}{Function \eqn{F-1}{F-1} taking a vector \code{u} and a
+ vector of length 2, \code{bounds}.}
+ \item{dF}{Derivative of \code{Fm1} wrt \code{u}: arguments \code{u}
+ and \code{bounds} }
+ \item{name}{Character string to use as name }
+}
+\value{
+An object of class \code{"calfun"}
+}
+
+\references{
+Deville J-C, Sarndal C-E, Sautory O (1993) Generalized Raking
+Procedures in Survey Sampling. JASA 88:1013-1020
+
+Deville J-C, Sarndal C-E (1992) Calibration Estimators in Survey
+Sampling. JASA 87: 376-382
+ }
+
+\seealso{\code{\link{calibrate}} }
+\examples{
+str(cal.linear)
+cal.linear$Fm1
+cal.linear$dF
+
+hellinger <- make.calfun(Fm1=function(u, bounds) ((1-u/2)^-2)-1,
+ dF= function(u, bounds) (1-u/2)^-3 ,
+ name="hellinger distance")
+
+hellinger
+
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+svymean(~api00,calibrate(dclus1, ~api99, pop=c(6194, 3914069),
+ calfun=hellinger))
+
+svymean(~api00,calibrate(dclus1, ~api99, pop=c(6194, 3914069),
+ calfun=cal.linear))
+
+svymean(~api00,calibrate(dclus1, ~api99, pop=c(6194,3914069),
+ calfun=cal.raking))
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/marginpred.Rd b/man/marginpred.Rd
new file mode 100644
index 0000000..8bd8b9d
--- /dev/null
+++ b/man/marginpred.Rd
@@ -0,0 +1,89 @@
+\name{marginpred}
+\Rdversion{1.1}
+\alias{marginpred}
+\alias{marginpred.svycoxph}
+\alias{marginpred.svykmlist}
+\alias{marginpred.svyglm}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Standardised predictions (predictive margins) for regression models.
+}
+\description{
+Reweights the design (using \code{\link{calibrate}}) so that the adjustment variables are uncorrelated
+with the variables in the model, and then performs predictions by
+calling \code{predict}. When the adjustment model is saturated this is
+equivalent to direct standardization on the adjustment variables.
+
+The \code{svycoxph} and \code{svykmlist} methods return survival curves.
+}
+\usage{
+marginpred(model, adjustfor, predictat, ...)
+\method{marginpred}{svycoxph}(model, adjustfor, predictat, se=FALSE, ...)
+\method{marginpred}{svykmlist}(model, adjustfor, predictat, se=FALSE, ...)
+\method{marginpred}{svyglm}(model, adjustfor, predictat, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{model}{
+ A regression model object of a class that has a \code{marginpred} method
+ }
+ \item{adjustfor}{
+ Model formula specifying adjustment variables, which must be in the
+ design object of the model
+ }
+ \item{predictat}{
+ A data frame giving values of the variables in \code{model} to
+ predict at}
+ \item{se}{Estimate standard errors for the survival curve (uses a lot
+ of memory if the sample size is large)}
+ \item{\dots}{Extra arguments, passed to the \code{predict} method for \code{model}}
+}
+
+\seealso{
+ \code{\link{svypredmeans}} for the method of Graubard and Korn implemented in SUDAAN.
+
+ \code{\link{calibrate}}
+
+ \code{\link{predict.svycoxph}}
+}
+\examples{
+## generate data with apparent group effect from confounding
+set.seed(42)
+df<-data.frame(x=rnorm(100))
+df$time<-rexp(100)*exp(df$x-1)
+df$status<-1
+df$group<-(df$x+rnorm(100))>0
+des<-svydesign(id=~1,data=df)
+newdf<-data.frame(group=c(FALSE,TRUE), x=c(0,0))
+
+## Cox model
+m0<-svycoxph(Surv(time,status)~group,design=des)
+m1<-svycoxph(Surv(time,status)~group+x,design=des)
+## conditional predictions, unadjusted and adjusted
+cpred0<-predict(m0, type="curve", newdata=newdf, se=TRUE)
+cpred1<-predict(m1, type="curve", newdata=newdf, se=TRUE)
+## adjusted marginal prediction
+mpred<-marginpred(m0, adjustfor=~x, predictat=newdf, se=TRUE)
+
+plot(cpred0)
+lines(cpred1[[1]],col="red")
+lines(cpred1[[2]],col="red")
+lines(mpred[[1]],col="blue")
+lines(mpred[[2]],col="blue")
+
+## Kaplan--Meier
+s2<-svykm(Surv(time,status>0)~group, design=des)
+p2<-marginpred(s2, adjustfor=~x, predictat=newdf,se=TRUE)
+plot(s2)
+lines(p2[[1]],col="green")
+lines(p2[[2]],col="green")
+
+## logistic regression
+logisticm <- svyglm(group~time, family=quasibinomial, design=des)
+newdf$time<-c(0.1,0.8)
+logisticpred <- marginpred(logisticm, adjustfor=~x, predictat=newdf)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/mu284.Rd b/man/mu284.Rd
new file mode 100644
index 0000000..1e2d5ad
--- /dev/null
+++ b/man/mu284.Rd
@@ -0,0 +1,34 @@
+\name{mu284}
+\alias{mu284}
+\docType{data}
+\title{Two-stage sample from MU284}
+\description{
+The MU284 population comes from Sarndal et al, and the complete data are
+available from Statlib. These data are a two-stage sample from the
+population, analyzed on page 143 of the book.
+}
+\usage{data(mu284)}
+\format{
+ A data frame with 15 observations on the following 5 variables.
+ \describe{
+ \item{\code{id1}}{identifier for PSU}
+ \item{\code{n1}}{number of PSUs in population}
+ \item{\code{id2}}{identifier for second-stage unit}
+ \item{\code{y1}}{variable to be analysed}
+ \item{\code{n2}}{number of second-stage units in this PSU}
+ }
+}
+\source{
+ Carl Erik Sarndal, Bengt Swensson, Jan Wretman. (1991) "Model Assisted
+ Survey Sampling" Springer.
+}
+\references{
+Full MU284 population at \url{http://lib.stat.cmu.edu/datasets/mu284}
+}
+\examples{
+data(mu284)
+(dmu284<-svydesign(id=~id1+id2,fpc=~n1+n2, data=mu284))
+(ytotal<-svytotal(~y1, dmu284))
+vcov(ytotal)
+}
+\keyword{datasets}
diff --git a/man/nhanes.Rd b/man/nhanes.Rd
new file mode 100644
index 0000000..0300d7e
--- /dev/null
+++ b/man/nhanes.Rd
@@ -0,0 +1,33 @@
+\name{nhanes}
+\alias{nhanes}
+\docType{data}
+\title{
+Cholesterol data from a US survey
+}
+\description{
+Data extracted from NHANES 2009-2010 on high cholesterol.
+}
+\usage{data(nhanes)}
+\format{
+ A data frame with 8591 observations on the following 7 variables.
+ \describe{
+ \item{\code{SDMVPSU}}{Primary sampling units}
+ \item{\code{SDMVSTRA}}{Sampling strata}
+ \item{\code{WTMEC2YR}}{Sampling weights}
+ \item{\code{HI_CHOL}}{Numeric vector: 1 for total cholesterol over
+ 240mg/dl, 0 under 240mg/dl}
+ \item{\code{race}}{1=Hispanic, 2=non-Hispanic white, 3=non-Hispanic
+ black, 4=other}
+ \item{\code{agecat}}{Age group \code{(0,19]} \code{(19,39]} \code{(39,59]} \code{(59,Inf]}}
+ \item{\code{RIAGENDR}}{Gender: 1=male, 2=female}
+ }
+}
+\source{
+ \url{ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/nhanes/2009-2010}
+}
+\examples{
+data(nhanes)
+design <- svydesign(id=~SDMVPSU, strata=~SDMVSTRA, weights=~WTMEC2YR, nest=TRUE,data=nhanes)
+design
+}
+\keyword{datasets}
diff --git a/man/nonresponse.Rd b/man/nonresponse.Rd
new file mode 100755
index 0000000..50103bf
--- /dev/null
+++ b/man/nonresponse.Rd
@@ -0,0 +1,95 @@
+\name{nonresponse}
+\alias{nonresponse}
+\alias{sparseCells}
+\alias{neighbours}
+\alias{joinCells}
+\alias{weights.nonresponse}
+\alias{print.nonresponse}
+\alias{print.nonresponseSubset}
+\alias{[.nonresponse}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Experimental: Construct non-response weights}
+\description{
+ Functions to simplify the construction of non-reponse weights by
+ combining strata with small numbers or large weights.
+}
+\usage{
+nonresponse(sample.weights, sample.counts, population)
+sparseCells(object, count=0,totalweight=Inf, nrweight=1.5)
+neighbours(index,object)
+joinCells(object,a,...)
+\method{weights}{nonresponse}(object,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{sample.weights}{table of sampling weight by stratifying variables}
+ \item{sample.counts}{table of sample counts by stratifying variables}
+ \item{population}{table of population size by stratifying variables}
+ \item{object}{object of class \code{"nonresponse"}}
+ \item{count}{Cells with fewer sampled units than this are "sparse"}
+ \item{nrweight}{Cells with higher non-response weight than this are "sparse"}
+ \item{totalweight}{Cells with average sampling weight times
+ non-response weight higher than this are "sparse"}
+ \item{index}{Number of a cell whose neighbours are to be found}
+ \item{a,...}{Cells to join}
+}
+\details{
+
+ When a stratified survey is conducted with imperfect response it is
+ desirable to rescale the sampling weights to reflect the nonresponse.
+ If some strata have small sample size, high non-response, or already
+ had high sampling weights it may be desirable to get less variable
+ non-response weights by averaging non-response across strata.
+ Suitable strata to collapse may be similar on the stratifying
+ variables and/or on the level of non-response.
+
+ \code{nonresponse()} combines stratified tables of population size,
+ sample size, and sample weight into an object. \code{sparseCells}
+ identifies cells that may need combining. \code{neighbours} describes the
+ cells adjacent to a specified cell, and \code{joinCells} collapses
+ the specified cells. When the collapsing is complete, use
+ \code{weights()} to extract the nonresponse weights.
+
+
+}
+\value{
+ \code{nonresponse} and \code{joinCells} return objects of class \code{"nonresponse"},
+ \code{neighbours} and \code{sparseCells} return objects of class \code{"nonresponseSubset"}
+}
+
+\examples{
+data(api)
+## pretend the sampling was stratified on three variables
+poptable<-xtabs(~sch.wide+comp.imp+stype,data=apipop)
+sample.count<-xtabs(~sch.wide+comp.imp+stype,data=apiclus1)
+sample.weight<-xtabs(pw~sch.wide+comp.imp+stype, data=apiclus1)
+
+## create a nonresponse object
+nr<-nonresponse(sample.weight,sample.count, poptable)
+
+## sparse cells
+sparseCells(nr)
+
+## Look at neighbours
+neighbours(3,nr)
+neighbours(11,nr)
+
+## Collapse some contiguous cells
+nr1<-joinCells(nr,3,5,7)
+
+## sparse cells now
+sparseCells(nr1)
+nr2<-joinCells(nr1,3,11,8)
+
+nr2
+
+## one relatively sparse cell
+sparseCells(nr2)
+## but nothing suitable to join it to
+neighbours(3,nr2)
+
+## extract the weights
+weights(nr2)
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/open.DBIsvydesign.Rd b/man/open.DBIsvydesign.Rd
new file mode 100644
index 0000000..f50f9c6
--- /dev/null
+++ b/man/open.DBIsvydesign.Rd
@@ -0,0 +1,55 @@
+\name{open.DBIsvydesign}
+\alias{open.DBIsvydesign}
+\alias{close.DBIsvydesign}
+\alias{open.ODBCsvydesign}
+\alias{close.ODBCsvydesign}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Open and close DBI connections }
+\description{
+ A database-backed survey design object contains a connection to a
+database. This connection will be broken if the object is saved and
+reloaded, and the connection should ideally be closed with \code{close}
+before quitting R (although it doesn't matter for SQLite
+connections). The connection can be reopened with \code{open}.
+}
+\usage{
+\method{open}{DBIsvydesign}(con, ...)
+\method{close}{DBIsvydesign}(con, ...)
+\method{open}{ODBCsvydesign}(con, ...)
+\method{close}{ODBCsvydesign}(con, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{con}{Object of class \code{DBIsvydesign} or \code{ODBCsvydesign}}
+ \item{\dots}{Other options, to be passed to \code{dbConnect} or
+ \code{dbDisconnect}, or \code{odbcReConnect} or
+ \code{odbcDisconnect} }
+}
+\value{
+The same survey design object with the connection opened or closed.
+}
+
+\seealso{\code{\link{svydesign}}
+
+ DBI package }
+\examples{
+\dontrun{
+library(RSQLite)
+dbclus1<-svydesign(id=~dnum, weights=~pw, fpc=~fpc,
+data="apiclus1",dbtype="SQLite",
+dbname=system.file("api.db",package="survey"))
+
+dbclus1
+close(dbclus1)
+dbclus1
+try(svymean(~api00, dbclus1))
+
+dbclus1<-open(dbclus1)
+open(dbclus1)
+svymean(~api00, dbclus1)
+}
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/paley.Rd b/man/paley.Rd
new file mode 100644
index 0000000..c7496bc
--- /dev/null
+++ b/man/paley.Rd
@@ -0,0 +1,72 @@
+\name{paley}
+\alias{paley}
+\alias{is.hadamard}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Paley-type Hadamard matrices}
+\description{
+ Computes a Hadamard matrix of dimension \eqn{(p+1)\times 2^k}{(p+1)*2^k}, where p is a prime,
+ and p+1 is a multiple of 4, using the Paley construction. Used by \code{\link{hadamard}}.
+}
+\usage{
+paley(n, nmax = 2 * n, prime=NULL, check=!is.null(prime))
+
+is.hadamard(H, style=c("0/1","+-"), full.orthogonal.balance=TRUE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{n}{Minimum size for matrix}
+ \item{nmax}{Maximum size for matrix. Ignored if \code{prime} is specified.}
+ \item{prime}{Optional. A prime at least as large as
+ \code{n}, such that \code{prime+1} is divisible by 4.}
+ \item{check}{Check that the resulting matrix is of Hadamard type}
+ \item{H}{Matrix}
+ \item{style}{\code{"0/1"} for a matrix of 0s and 1s, \code{"+-"} for a
+ matrix of \eqn{\pm 1}{+/-1}.}
+ \item{full.orthogonal.balance}{Require full orthogonal balance?}
+}
+
+\value{
+ For \code{paley}, a matrix of zeros and ones, or \code{NULL} if no matrix smaller than
+ \code{nmax} can be found.
+
+ For \code{is.hadamard}, \code{TRUE} if \code{H} is a Hadamard matrix.
+}
+\details{
+ The Paley construction gives a Hadamard matrix of order p+1 if p is
+ prime and p+1 is a multiple of 4. This is then expanded to order
+ \eqn{(p+1)\times 2^k}{(p+1)*2^k} using the Sylvester construction.
+
+ \code{paley} knows primes up to 7919. The user can specify a prime
+ with the \code{prime} argument, in which case a matrix of order
+ \eqn{p+1}{p+1} is constructed.
+
+ If \code{check=TRUE} the code uses \code{is.hadamard} to check that
+ the resulting matrix really is of Hadamard type, in the same way as in
+ the example below. As this test takes \eqn{n^3}{n^3} time it is
+ preferable to just be sure that \code{prime} really is prime.
+
+ A Hadamard matrix including a row of 1s gives BRR designs where the
+ average of the replicates for a linear statistic is exactly the full
+ sample estimate. This property is called full orthogonal balance.
+}
+\references{
+Cameron PJ (2005) Hadamard Matrices.
+\url{http://designtheory.org/library/encyc/topics/had.pdf}. In: The
+Encyclopedia of Design Theory \url{http://designtheory.org/library/encyc/}
+
+}
+\seealso{ \code{\link{hadamard}}}
+\examples{
+
+M<-paley(11)
+
+is.hadamard(M)
+## internals of is.hadamard(M)
+H<-2*M-1
+## HH^T is diagonal for any Hadamard matrix
+H\%*\%t(H)
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{algebra}
+
diff --git a/man/pchisqsum.Rd b/man/pchisqsum.Rd
new file mode 100644
index 0000000..e75e4b3
--- /dev/null
+++ b/man/pchisqsum.Rd
@@ -0,0 +1,108 @@
+\name{pchisqsum}
+\alias{pchisqsum}
+\alias{pFsum}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Distribution of quadratic forms }
+\description{
+ The distribution of a quadratic form in p standard Normal variables is
+ a linear combination of p chi-squared distributions with 1df. When
+ there is uncertainty about the variance, a reasonable model for the
+ distribution is a linear combination of F distributions with the same
+ denominator.
+
+}
+\usage{
+pchisqsum(x, df, a, lower.tail = TRUE,
+ method = c("satterthwaite", "integration","saddlepoint"))
+pFsum(x, df, a, ddf=Inf,lower.tail = TRUE,
+ method = c("saddlepoint","integration","satterthwaite"), ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{x}{Observed values}
+ \item{df}{Vector of degrees of freedom}
+ \item{a}{Vector of coefficients }
+ \item{ddf}{Denominator degrees of freedom}
+ \item{lower.tail}{ lower or upper tail? }
+ \item{method}{See Details below}
+ \item{\dots}{arguments to \code{pchisqsum}}
+}
+
+\value{
+Vector of cumulative probabilities
+}
+
+\details{
+ The \code{"satterthwaite"} method uses Satterthwaite's
+ approximation, and this is also used as a fallback for the other
+ methods. The accuracy is usually good, but is more variable depending
+ on \code{a} than the other methods and is anticonservative in the
+ extreme tail. The Satterthwaite approximation requires all \code{a>0}.
+
+ \code{"integration"} requires the \code{CompQuadForm} package. For
+ \code{pchisqsum} it uses Farebrother's algorithm if all
+ \code{a>0}. For \code{pFsum} or when some \code{a<0} it inverts the
+ characteristic function using the algorithm of Davies (1980). If
+ the \code{CompQuadForm} package is not present, a warning is given
+ and the saddlepoint approximation is used. These algorithms are not
+ accurate for very large \code{x} or when some \code{a} are close to
+ zero: a warning is given if the relative error bound is more than
+ 10\% of the result.
+
+ \code{"saddlepoint"} uses Kuonen's saddlepoint approximation. This
+ is accurate even very far out in the upper tail or with some
+ \code{a=0} and does not require any additional packages. It is
+ implemented in pure R and so is slower than the \code{"integration"}
+ method.
+
+ The distribution in \code{pFsum} is standardised so that a likelihood
+ ratio test can use the same \code{x} value as in \code{pchisqsum}.
+ That is, the linear combination of chi-squareds is multiplied by
+ \code{ddf} and then divided by an independent chi-squared with
+ \code{ddf} degrees of freedom.
+
+ }
+
+\references{
+Davies RB (1973). "Numerical inversion of a characteristic function"
+Biometrika 60:415-7
+
+Davies RB (1980) "Algorithm AS 155: The Distribution of a Linear Combination of chi-squared Random Variables"
+Applied Statistics,Vol. 29, No. 3 (1980), pp. 323-333
+
+P. Duchesne, P. Lafaye de Micheaux (2010) "Computing the distribution
+of quadratic forms: Further comparisons between the Liu-Tang-Zhang
+approximation and exact methods", Computational Statistics and Data
+Analysis, Volume 54, (2010), 858-862
+
+Farebrother R.W. (1984) "Algorithm AS 204: The distribution of a
+Positive Linear Combination of chi-squared random variables". Applied
+Statistics Vol. 33, No. 3 (1984), p. 332-339
+
+Kuonen D (1999) Saddlepoint Approximations for Distributions of
+Quadratic Forms in Normal Variables. Biometrika, Vol. 86, No. 4
+(Dec., 1999), pp. 929-935
+
+}
+\seealso{\code{\link{pchisq}}}
+\examples{
+x <- 2.7*rnorm(1001)^2+rnorm(1001)^2+0.3*rnorm(1001)^2
+x.thin<-sort(x)[1+(0:50)*20]
+p.invert<-pchisqsum(x.thin,df=c(1,1,1),a=c(2.7,1,.3),method="int" ,lower=FALSE)
+p.satt<-pchisqsum(x.thin,df=c(1,1,1),a=c(2.7,1,.3),method="satt",lower=FALSE)
+p.sadd<-pchisqsum(x.thin,df=c(1,1,1),a=c(2.7,1,.3),method="sad",lower=FALSE)
+
+plot(p.invert, p.satt,type="l",log="xy")
+abline(0,1,lty=2,col="purple")
+plot(p.invert, p.sadd,type="l",log="xy")
+abline(0,1,lty=2,col="purple")
+
+pchisqsum(20, df=c(1,1,1),a=c(2.7,1,.3), lower.tail=FALSE,method="sad")
+pFsum(20, df=c(1,1,1),a=c(2.7,1,.3), ddf=49,lower.tail=FALSE,method="sad")
+pFsum(20, df=c(1,1,1),a=c(2.7,1,.3), ddf=1000,lower.tail=FALSE,method="sad")
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{distribution}% __ONLY ONE__ keyword per line
diff --git a/man/postStratify.Rd b/man/postStratify.Rd
new file mode 100755
index 0000000..bc31772
--- /dev/null
+++ b/man/postStratify.Rd
@@ -0,0 +1,105 @@
+\name{postStratify}
+\alias{postStratify}
+\alias{postStratify.twophase}
+\alias{postStratify.svyrep.design}
+\alias{postStratify.survey.design}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Post-stratify a survey }
+\description{
+ Post-stratification adjusts the sampling and replicate weights so that
+ the joint distribution of a set of post-stratifying variables matches
+ the known population joint distribution. Use \code{\link{rake}} when
+ the full joint distribution is not available.
+}
+\usage{
+postStratify(design, strata, population, partial = FALSE, ...)
+\method{postStratify}{svyrep.design}(design, strata, population, partial = FALSE, compress=NULL,...)
+\method{postStratify}{survey.design}(design, strata, population, partial = FALSE, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{design}{A survey design with replicate weights}
+ \item{strata}{A formula or data frame of post-stratifying variables, which must not contain missing values. }
+ \item{population}{A \code{\link{table}}, \code{\link{xtabs}} or \code{data.frame}
+ with population frequencies }
+ \item{partial}{if \code{TRUE}, ignore population strata not present in
+ the sample}
+ \item{compress}{Attempt to compress the replicate weight matrix? When
+ \code{NULL} will attempt to compress if the original weight matrix
+ was compressed}
+ \item{...}{arguments for future expansion}
+}
+\details{
+ The \code{population} totals can be specified as a table with the
+ strata variables in the margins, or as a data frame where one column
+ lists frequencies and the other columns list the unique combinations
+ of strata variables (the format produced by \code{as.data.frame}
+ acting on a \code{table} object). A table must have named dimnames
+ to indicate the variable names.
+
+ Compressing the replicate weights will take time and may even
+ increase memory use if there is actually little redundancy in the
+ weight matrix (in particular if the post-stratification variables have
+ many values and cut across PSUs).
+
+ If a \code{svydesign} object is to be converted to a replication
+ design the post-stratification should be performed after conversion.
+
+
+ The variance estimate for replication designs follows the same
+ procedure as Valliant (1993) described for estimating totals. Rao et
+ al (2002) describe this procedure for estimating functions (and also
+ the GREG or g-calibration procedure, see \code{\link{calibrate}})
+
+}
+
+\note{
+ If the sampling weights are already post-stratified there will be no
+ change in point estimates after \code{postStratify} but the standard
+ error estimates will decrease to correctly reflect the post-stratification.
+}
+
+\value{
+ A new survey design object.
+}
+\references{
+Valliant R (1993) Post-stratification and conditional variance
+estimation. JASA 88: 89-96
+
+Rao JNK, Yung W, Hidiroglou MA (2002) Estimating equations for the
+analysis of survey data using poststratification information. Sankhya
+64 Series A Part 2, 364-378.
+}
+
+\seealso{
+ \code{\link{rake}}, \code{\link{calibrate}} for other things to do
+ with auxiliary information
+
+ \code{\link{compressWeights}} for information on compressing weights}
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+rclus1<-as.svrepdesign(dclus1)
+
+svymean(~api00, rclus1)
+svytotal(~enroll, rclus1)
+
+# post-stratify on school type
+pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
+#or: pop.types <- xtabs(~stype, data=apipop)
+#or: pop.types <- table(stype=apipop$stype)
+
+rclus1p<-postStratify(rclus1, ~stype, pop.types)
+summary(rclus1p)
+svymean(~api00, rclus1p)
+svytotal(~enroll, rclus1p)
+
+
+## and for svydesign objects
+dclus1p<-postStratify(dclus1, ~stype, pop.types)
+summary(dclus1p)
+svymean(~api00, dclus1p)
+svytotal(~enroll, dclus1p)
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}
diff --git a/man/rake.Rd b/man/rake.Rd
new file mode 100755
index 0000000..01f3c33
--- /dev/null
+++ b/man/rake.Rd
@@ -0,0 +1,114 @@
+\name{rake}
+\alias{rake}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Raking of replicate weight design}
+\description{
+ Raking uses iterative post-stratification to match marginal
+ distributions of a survey sample to known population margins.
+}
+\usage{
+rake(design, sample.margins, population.margins, control = list(maxit =
+10, epsilon = 1, verbose=FALSE), compress=NULL)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{design}{A survey object }
+ \item{sample.margins}{list of formulas or data frames describing
+ sample margins, which must not contain missing values}
+ \item{population.margins}{list of tables or data frames
+ describing corresponding population margins }
+ \item{control}{\code{maxit} controls the number of
+ iterations. Convergence is declared if the maximum change in a table
+ entry is less than \code{epsilon}. If \code{epsilon<1} it is
+ taken to be a fraction of the total sampling weight. }
+ \item{compress}{If \code{design} has replicate weights, attempt to
+ compress the new replicate weight matrix? When \code{NULL}, will
+ attempt to compress if the original weight matrix was compressed}
+}
+\details{
+ The \code{sample.margins} should be in a format suitable for \code{\link{postStratify}}.
+
+ Raking (aka iterative proportional fitting) is known to converge for
+ any table without zeros, and for any table with zeros for which there
+ is a joint distribution with the given margins and the same pattern of
+ zeros. The `margins' need not be one-dimensional.
+
+ The algorithm works by repeated calls to \code{\link{postStratify}}
+ (iterative proportional fitting), which is efficient for large
+ multiway tables. For small tables \code{\link{calibrate}} will be
+ faster, and also allows raking to population totals for continuous
+ variables, and raking with bounded weights.
+}
+\value{
+ A raked survey design.
+}
+
+\seealso{
+ \code{\link{postStratify}}, \code{\link{compressWeights}}
+
+ \code{\link{calibrate}} for other ways to use auxiliary information.
+}
+\examples{
+data(api)
+dclus1 <- svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+rclus1 <- as.svrepdesign(dclus1)
+
+svymean(~api00, rclus1)
+svytotal(~enroll, rclus1)
+
+## population marginal totals for each stratum
+pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
+pop.schwide <- data.frame(sch.wide=c("No","Yes"), Freq=c(1072,5122))
+
+rclus1r <- rake(rclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
+
+svymean(~api00, rclus1r)
+svytotal(~enroll, rclus1r)
+
+## marginal totals correspond to population
+xtabs(~stype, apipop)
+svytable(~stype, rclus1r, round=TRUE)
+xtabs(~sch.wide, apipop)
+svytable(~sch.wide, rclus1r, round=TRUE)
+
+## joint totals don't correspond
+xtabs(~stype+sch.wide, apipop)
+svytable(~stype+sch.wide, rclus1r, round=TRUE)
+
+## Do it for a design without replicate weights
+dclus1r<-rake(dclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
+
+svymean(~api00, dclus1r)
+svytotal(~enroll, dclus1r)
+
+## compare to raking with calibrate()
+dclus1gr<-calibrate(dclus1, ~stype+sch.wide, pop=c(6194, 755,1018,5122),
+ calfun="raking")
+svymean(~stype+api00, dclus1r)
+svymean(~stype+api00, dclus1gr)
+
+## compare to joint post-stratification
+## (only possible if joint population table is known)
+##
+pop.table <- xtabs(~stype+sch.wide,apipop)
+rclus1ps <- postStratify(rclus1, ~stype+sch.wide, pop.table)
+svytable(~stype+sch.wide, rclus1ps, round=TRUE)
+
+svymean(~api00, rclus1ps)
+svytotal(~enroll, rclus1ps)
+
+## Example of raking with partial joint distributions
+pop.imp<-data.frame(comp.imp=c("No","Yes"),Freq=c(1712,4482))
+dclus1r2<-rake(dclus1, list(~stype+sch.wide, ~comp.imp),
+ list(pop.table, pop.imp))
+svymean(~api00, dclus1r2)
+
+## compare to calibrate() syntax with tables
+dclus1r2<-calibrate(dclus1, formula=list(~stype+sch.wide, ~comp.imp),
+ population=list(pop.table, pop.imp),calfun="raking")
+svymean(~api00, dclus1r2)
+
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}
diff --git a/man/regTermTest.Rd b/man/regTermTest.Rd
new file mode 100755
index 0000000..66d0e37
--- /dev/null
+++ b/man/regTermTest.Rd
@@ -0,0 +1,66 @@
+\name{regTermTest}
+\alias{regTermTest}
+\alias{print.regTermTest}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Wald test for a term in a regression model}
+\description{
+ Provides Wald test and working likelihood ratio (Rao-Scott) test of the
+ hypothesis that all coefficients associated with a particular
+ regression term are zero (or have some other specified
+ values). Particularly useful as a substitute for \code{\link{anova}}
+ when not fitting by maximum likelihood. The Wald tests use a
+ chisquared or F distribution, the LRT uses a linear combination of
+ chisquared or F distributions as in \code{\link{pchisqsum}}.
+}
+\usage{
+regTermTest(model, test.terms, null=NULL,df=NULL,
+method=c("Wald","LRT"), lrt.approximation="saddlepoint")
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{model}{A model object with \code{\link{coef}} and \code{\link{vcov}} methods}
+ \item{test.terms}{Character string or one-sided formula giving name of
+ term or terms to test}
+ \item{null}{Null hypothesis values for parameters. Default is zeros}
+ \item{df}{Denominator degrees of freedom for an F test. If
+ \code{NULL} these are estimated from the model. Use \code{Inf} for a
+ chi-squared test.}
+\item{method}{If \code{"Wald"}, the Wald-type test; if \code{"LRT"}
+ the Rao-Scott test based on the estimated log likelihood ratio}
+\item{lrt.approximation}{method for approximating the distribution of
+ the LRT statistic; see \code{\link{pchisqsum}}}
+}
+\value{
+ An object of class \code{regTermTest} or \code{regTermTestLRT}.
+}
+\references{
+Rao, JNK, Scott, AJ (1984) "On Chi-squared Tests For Multiway Contingency Tables with Proportions Estimated From Survey Data" Annals of Statistics 12:46-60.
+
+Lumley T, Scott A (2012) "Partial likelihood ratio tests for the Cox model under complex sampling" Statistics in Medicine 17 JUL 2012. DOI: 10.1002/sim.5492
+}
+\note{
+The \code{"LRT"} method will not work if the model had starting values supplied for the regression coefficients. Instead, fit the two models separately and use \code{anova(model1, model2, force=TRUE)}
+
+}
+\seealso{\code{\link{anova}}, \code{\link{vcov}}, \code{\link{contrasts}},\code{\link{pchisqsum}}}
+\examples{
+ data(esoph)
+ model1 <- glm(cbind(ncases, ncontrols) ~ agegp + tobgp *
+ alcgp, data = esoph, family = binomial())
+ anova(model1)
+
+ regTermTest(model1,"tobgp")
+ regTermTest(model1,"tobgp:alcgp")
+ regTermTest(model1, ~alcgp+tobgp:alcgp)
+
+
+ data(api)
+ dclus2<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)
+ model2<-svyglm(I(sch.wide=="Yes")~ell+meals+mobility, design=dclus2, family=quasibinomial())
+ regTermTest(model2, ~ell)
+ regTermTest(model2, ~ell,df=NULL)
+ regTermTest(model2, ~ell, method="LRT", df=Inf)
+ regTermTest(model2, ~ell+meals, method="LRT", df=NULL)
+}
+\keyword{regression}% at least one, from doc/KEYWORDS
+
diff --git a/man/scd.Rd b/man/scd.Rd
new file mode 100755
index 0000000..44d8c4b
--- /dev/null
+++ b/man/scd.Rd
@@ -0,0 +1,62 @@
+\name{scd}
+\alias{scd}
+\non_function{}
+\title{Survival in cardiac arrest}
+\usage{data(scd)}
+\description{
+These data are from Section 12.2 of Levy and Lemeshow. They describe
+(a possibly apocryphal) study of survival in out-of-hospital cardiac
+arrest. Two out of five ambulance stations were sampled from each of
+three emergency service areas.
+}
+\format{
+ This data frame contains the following columns:
+ \describe{
+ \item{ESA}{Emergency Service Area (strata)}
+ \item{ambulance}{Ambulance station (PSU)}
+ \item{arrests}{estimated number of cardiac arrests}
+ \item{alive}{number reaching hospital alive}
+ }
+}
+
+\source{
+Levy and Lemeshow. "Sampling of Populations" (3rd edition). Wiley.
+}
+\examples{
+data(scd)
+
+## survey design objects
+scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
+nest=TRUE, fpc=rep(5,6))
+scdnofpc<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
+nest=TRUE)
+
+# convert to BRR replicate weights
+scd2brr <- as.svrepdesign(scdnofpc, type="BRR")
+# or to Rao-Wu bootstrap
+scd2boot <- as.svrepdesign(scdnofpc, type="subboot")
+
+# use BRR replicate weights from Levy and Lemeshow
+repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
+c(0,1,0,1,1,0))
+scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)
+
+# ratio estimates
+svyratio(~alive, ~arrests, design=scddes)
+svyratio(~alive, ~arrests, design=scdnofpc)
+svyratio(~alive, ~arrests, design=scd2brr)
+svyratio(~alive, ~arrests, design=scd2boot)
+svyratio(~alive, ~arrests, design=scdrep)
+
+# or a logistic regression
+summary(svyglm(cbind(alive,arrests-alive)~1, family=quasibinomial, design=scdnofpc))
+summary(svyglm(cbind(alive,arrests-alive)~1, family=quasibinomial, design=scdrep))
+
+# Because no sampling weights are given, can't compute design effects
+# without replacement: use deff="replace"
+
+svymean(~alive+arrests, scddes, deff=TRUE)
+svymean(~alive+arrests, scddes, deff="replace")
+
+}
+\keyword{datasets}
diff --git a/man/stratsample.Rd b/man/stratsample.Rd
new file mode 100644
index 0000000..c3756a2
--- /dev/null
+++ b/man/stratsample.Rd
@@ -0,0 +1,42 @@
+\name{stratsample}
+\Rdversion{1.1}
+\alias{stratsample}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Take a stratified sample
+}
+\description{
+This function takes a stratified sample without replacement from a data set.
+}
+\usage{
+stratsample(strata, counts)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{strata}{
+Vector of stratum identifiers; will be coerced to character
+}
+ \item{counts}{
+ named vector of stratum sample sizes, with names corresponding to the values of \code{as.character(strata)}
+}
+}
+
+\value{
+vector of indices into \code{strata} giving the sample
+}
+
+
+\seealso{
+\code{\link{sample}}
+
+The "sampling" package has many more sampling algorithms.
+}
+\examples{
+ data(api)
+ s<-stratsample(apipop$stype, c("E"=5,"H"=4,"M"=2))
+ table(apipop$stype[s])
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/subset.survey.design.Rd b/man/subset.survey.design.Rd
new file mode 100755
index 0000000..2b9bd24
--- /dev/null
+++ b/man/subset.survey.design.Rd
@@ -0,0 +1,48 @@
+\name{subset.survey.design}
+\alias{subset.survey.design}
+\alias{subset.svyrep.design}
+\alias{[.survey.design}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Subset of survey}
+\description{
+Restrict a survey design to a subpopulation, keeping the original design
+information about number of clusters, strata. If the design has no
+post-stratification or calibration data the subset will use
+proportionately less memory.
+}
+\usage{
+\method{subset}{survey.design}(x, subset, ...)
+\method{subset}{svyrep.design}(x, subset, ...)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{x}{A survey design object}
+ \item{subset}{An expression specifying the subpopulation}
+ \item{\dots}{Arguments not used by this method}
+}
+\value{
+ A new survey design object
+}
+
+\seealso{\code{\link{svydesign}}}
+
+\examples{
+data(fpc)
+dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE)
+dsub<-subset(dfpc,x>4)
+summary(dsub)
+svymean(~x,design=dsub)
+
+## These should give the same domain estimates and standard errors
+svyby(~x,~I(x>4),design=dfpc, svymean)
+summary(svyglm(x~I(x>4)+0,design=dfpc))
+
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+rclus1<-as.svrepdesign(dclus1)
+svymean(~enroll, subset(dclus1, sch.wide=="Yes" & comp.imp=="Yes"))
+svymean(~enroll, subset(rclus1, sch.wide=="Yes" & comp.imp=="Yes"))
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/surveyoptions.Rd b/man/surveyoptions.Rd
new file mode 100644
index 0000000..1f6745a
--- /dev/null
+++ b/man/surveyoptions.Rd
@@ -0,0 +1,82 @@
+\name{surveyoptions}
+\alias{surveyoptions}
+\alias{survey.lonely.psu}
+\alias{survey.ultimate.cluster}
+\alias{survey.adjust.domain.lonely}
+\alias{survey.want.obsolete}
+\alias{survey.drop.replicates}
+\alias{survey.multicore}
+\alias{survey.replicates.mse}
+\title{Options for the survey package}
+\description{
+ This help page documents the options that control the behaviour of
+ the survey package.
+ }
+
+ \details{
+ All the options for the survey package have names beginning with
+ "survey". Four of them control standard error estimation.
+
+ \code{options("survey.replicates.mse")} controls the default in
+ \code{svrepdesign} and \code{as.svrepdesign} for computing
+ variances. When \code{options("survey.replicates.mse")} is
+ \code{TRUE}, the default is to create replicate weight designs that
+ compute variances centered at the point estimate, rather than at the
+ mean of the replicates. The option can be overridden by specifying
+ the \code{mse} argument explicitly in \code{svrepdesign} and
+ \code{as.svrepdesign}. The default is \code{FALSE}.
+
+ When \code{options("survey.ultimate.cluster")} is \code{TRUE},
+ standard error estimation is based on independence of PSUs at the
+ first stage of sampling, without using any information about
+ subsequent stages. When \code{FALSE}, finite population corrections
+ and variances are estimated recursively. See \code{\link{svyrecvar}}
+ for more information. This option makes no difference unless
+ first-stage finite population corrections are specified, in which
+ case setting the option to \code{TRUE} gives the wrong answer for a
+ multistage study. The only reason to use \code{TRUE} is for
+ compatibility with other software that gives the wrong answer.
+
+ Handling of strata with a single PSU that are not certainty PSUs is
+ controlled by \code{options("survey.lonely.psu")}. The default
+ setting is \code{"fail"}, which gives an error. Use \code{"remove"}
+ to ignore that PSU for variance computation, \code{"adjust"} to
+ center the stratum at the population mean rather than the stratum
+ mean, and \code{"average"} to replace the variance contribution of
+ the stratum by the average variance contribution across strata. As
+ of version 3.4-2 \code{as.svrepdesign} also uses this option.
+
+ The variance formulas for domain estimation give well-defined,
+ positive results when a stratum contains only one PSU with
+ observations in the domain, but are not unbiased. If
+ \code{options("survey.adjust.domain.lonely")} is \code{TRUE} and
+ \code{options("survey.lonely.psu")} is \code{"average"} or
+ \code{"adjust"} the same adjustment for lonely PSUs will be used
+ within a domain. Note that this adjustment is not available for
+ replicate-weight designs, nor (currently) for raked,
+ post-stratified, or calibrated designs.
+
+ The fourth option is \code{options("survey.want.obsolete")}. This
+ controls the warnings about using the deprecated pre-2.9.0 survey
+ design objects.
+
+ The behaviour of replicate-weight designs for self-representing
+ strata is controlled by \code{options("survey.drop.replicates")}.
+ When \code{TRUE}, various optimizations are used that take advantage
+ of the fact that these strata do not contribute to the variance.
+ The only reason ever to use \code{FALSE} is if there is a bug in
+ the code for these optimizations.
+
+ The fifth option controls the use of multiple processors with the
+ \code{multicore} package. This option should not affect the values
+ computed by any of the survey functions. If \code{TRUE}, all
+ functions that are able to use multiple processors will do so by
+ default. Using multiple processors may speed up calculations, but
+ need not, especially if the computer is short on memory. The best
+ strategy is probably to experiment with explicitly requesting
+ \code{multicore=TRUE} in functions that support it, to see if there
+ is an increase in speed before setting the global option.
+
+ }
+
+ \keyword{survey}
diff --git a/man/surveysummary.Rd b/man/surveysummary.Rd
new file mode 100755
index 0000000..f2c6c4d
--- /dev/null
+++ b/man/surveysummary.Rd
@@ -0,0 +1,221 @@
+\name{surveysummary}
+\alias{svymean}
+\alias{svymean.survey.design}
+\alias{svymean.survey.design2}
+\alias{svymean.svyrep.design}
+\alias{svymean.twophase}
+\alias{svytotal}
+\alias{svytotal.twophase}
+\alias{svytotal.survey.design}
+\alias{svytotal.survey.design2}
+\alias{svytotal.svyrep.design}
+\alias{svyvar}
+\alias{svyvar.survey.design}
+\alias{svyvar.svyrep.design}
+\alias{coef.svystat}
+\alias{vcov.svystat}
+\alias{coef.svrepstat}
+\alias{vcov.svrepstat}
+\alias{cv.svyratio}
+\alias{cv.svrepratio}
+\alias{cv.svrepstat}
+\alias{cv.svystat}
+\alias{cv.default}
+\alias{cv}
+\alias{deff}
+\alias{deff.default}
+\alias{confint.svystat}
+\alias{confint.svrepstat}
+\alias{make.formula}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Summary statistics for sample surveys}
+\description{
+Compute means, variances, ratios and totals for data from complex surveys.
+}
+\usage{
+\method{svymean}{survey.design}(x, design, na.rm=FALSE,deff=FALSE,...)
+\method{svymean}{twophase}(x, design, na.rm=FALSE,deff=FALSE,...)
+\method{svymean}{svyrep.design}(x, design, na.rm=FALSE, rho=NULL,
+ return.replicates=FALSE, deff=FALSE,...)
+\method{svyvar}{survey.design}(x, design, na.rm=FALSE,...)
+\method{svyvar}{svyrep.design}(x, design, na.rm=FALSE, rho=NULL,
+ return.replicates=FALSE,...,estimate.only=FALSE)
+\method{svytotal}{survey.design}(x, design, na.rm=FALSE,deff=FALSE,...)
+\method{svytotal}{twophase}(x, design, na.rm=FALSE,deff=FALSE,...)
+\method{svytotal}{svyrep.design}(x, design, na.rm=FALSE, rho=NULL,
+ return.replicates=FALSE, deff=FALSE,...)
+\method{coef}{svystat}(object,...)
+\method{coef}{svrepstat}(object,...)
+\method{vcov}{svystat}(object,...)
+\method{vcov}{svrepstat}(object,...)
+\method{confint}{svystat}(object, parm, level = 0.95,df =Inf,...)
+\method{confint}{svrepstat}(object, parm, level = 0.95,df =Inf,...)
+cv(object,...)
+deff(object, quietly=FALSE,...)
+make.formula(names)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{x}{A formula, vector or matrix}
+ \item{design}{\code{survey.design} or \code{svyrep.design} object}
+ \item{na.rm}{Should cases with missing values be dropped?}
+ \item{rho}{parameter for Fay's variance estimator in a BRR design}
+ \item{return.replicates}{Return the replicate means?}
+ \item{deff}{Return the design effect (see below)}
+ \item{object}{The result of one of the other survey summary functions}
+ \item{quietly}{Don't warn when there is no design effect computed}
+ \item{estimate.only}{Don't compute standard errors (useful when
+ \code{svyvar} is used to estimate the design effect)}
+ \item{parm}{a specification of which parameters are to be given
+ confidence intervals, either a vector of numbers or a vector of
+ names. If missing, all parameters are considered.}
+ \item{level}{the confidence level required.}
+ \item{df}{degrees of freedom for t-distribution in confidence
+ interval, use \code{degf(design)} for number of PSUs minus number of
+ strata}
+ \item{...}{additional arguments to methods,not currently
+ used}
+ \item{names}{vector of character strings}
+}
+\details{
+
+These functions perform weighted estimation, with each observation being
+weighted by the inverse of its sampling probability. Except for the
+table functions, these also give precision estimates that incorporate
+the effects of stratification and clustering.
+
+Factor variables are converted to sets of indicator variables for each
+category in computing means and totals. Combining this with the
+\code{\link{interaction}} function, allows crosstabulations. See
+\code{\link{ftable.svystat}} for formatting the output.
+
+With \code{na.rm=TRUE}, all cases with missing data are removed. With
+\code{na.rm=FALSE} cases with missing data are not removed and so will
+produce missing results. When using replicate weights and
+\code{na.rm=FALSE} it may be useful to set
+\code{options(na.action="na.pass")}, otherwise all replicates with any
+missing results will be discarded.
+
+The \code{svytotal} and \code{svreptotal} functions estimate a
+population total. Use \code{predict} on \code{\link{svyratio}} and
+\code{\link{svyglm}}, to get ratio or regression estimates of totals.
+
+\code{svyvar} estimates the population variance. The object returned
+includes the full matrix of estimated population variances and
+covariances, but by default only the diagonal elements are printed. To
+display the whole matrix use \code{as.matrix(v)} or \code{print(v,
+covariance=TRUE)}.
+
+The design effect compares the variance of a mean or total to the
+variance from a study of the same size using simple random sampling
+without replacement. Note that the design effect will be incorrect if
+the weights have been rescaled so that they are not reciprocals of
+sampling probabilities. To obtain an estimate of the design effect
+comparing to simple random sampling with replacement, which does not
+have this requirement, use \code{deff="replace"}. This with-replacement
+design effect is the square of Kish's "deft".
+
+The design effect for a subset of a design conditions on the size of
+the subset. That is, it compares the variance of the estimate to the
+variance of an estimate based on a simple random sample of the same
+size as the subset, taken from the subpopulation. So, for example,
+under stratified random sampling the design effect in a subset
+consisting of a single stratum will be 1.0.
+
+
+The \code{cv} function computes the coefficient of variation of a
+statistic such as ratio, mean or total. The default method is for any
+object with methods for \code{\link{SE}} and \code{coef}.
+
+\code{make.formula} makes a formula from a vector of names. This is
+useful because formulas as the best way to specify variables to the
+survey functions.
+
+}
+\value{
+ Objects of class \code{"svystat"} or \code{"svrepstat"},
+ which are vectors with a \code{"var"} attribute giving the variance
+ and a \code{"statistic"} attribute giving the name of the
+ statistic.
+
+ These objects have methods for \code{vcov}, \code{SE}, \code{coef},
+ \code{confint}, \code{svycontrast}.
+
+}
+
+\author{Thomas Lumley}
+
+\seealso{ \code{\link{svydesign}}, \code{\link{as.svrepdesign}},
+ \code{\link{svrepdesign}} for constructing design objects.
+
+ \code{\link{degf}} to extract degrees of freedom from a design.
+
+ \code{\link{svyquantile}} for quantiles
+
+ \code{\link{ftable.svystat}} for more attractive tables
+
+ \code{\link{svyciprop}} for more accurate confidence intervals for
+ proportions near 0 or 1.
+
+ \code{\link{svyttest}} for comparing two means.
+
+ \code{\link{svycontrast}} for linear and nonlinear functions of estimates.
+}
+
+\examples{
+
+ data(api)
+
+ ## one-stage cluster sample
+ dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+ svymean(~api00, dclus1, deff=TRUE)
+ svymean(~factor(stype),dclus1)
+ svymean(~interaction(stype, comp.imp), dclus1)
+ svyquantile(~api00, dclus1, c(.25,.5,.75))
+ svytotal(~enroll, dclus1, deff=TRUE)
+ svyratio(~api.stu, ~enroll, dclus1)
+
+ v<-svyvar(~api00+api99, dclus1)
+ v
+ print(v, cov=TRUE)
+ as.matrix(v)
+
+
+ # replicate weights - jackknife (this is slower)
+ dstrat<-svydesign(id=~1,strata=~stype, weights=~pw,
+ data=apistrat, fpc=~fpc)
+ jkstrat<-as.svrepdesign(dstrat)
+
+ svymean(~api00, jkstrat)
+ svymean(~factor(stype),jkstrat)
+ svyvar(~api00+api99,jkstrat)
+
+ svyquantile(~api00, jkstrat, c(.25,.5,.75))
+ svytotal(~enroll, jkstrat)
+ svyratio(~api.stu, ~enroll, jkstrat)
+
+ # coefficients of variation
+ cv(svytotal(~enroll,dstrat))
+ cv(svyratio(~api.stu, ~enroll, jkstrat))
+
+ # extracting information from the results
+ coef(svytotal(~enroll,dstrat))
+ vcov(svymean(~api00+api99,jkstrat))
+ SE(svymean(~enroll, dstrat))
+ confint(svymean(~api00+api00, dclus1))
+ confint(svymean(~api00+api00, dclus1), df=degf(dclus1))
+
+ # Design effect
+ svymean(~api00, dstrat, deff=TRUE)
+ svymean(~api00, dstrat, deff="replace")
+ svymean(~api00, jkstrat, deff=TRUE)
+ svymean(~api00, jkstrat, deff="replace")
+ (a<-svytotal(~enroll, dclus1, deff=TRUE))
+ deff(a)
+
+
+ }
+
+\keyword{univar}% at least one, from doc/KEYWORDS
+\keyword{survey}% __ONLY ONE__ keyword per line
diff --git a/man/svrVar.Rd b/man/svrVar.Rd
new file mode 100755
index 0000000..487824f
--- /dev/null
+++ b/man/svrVar.Rd
@@ -0,0 +1,35 @@
+\name{svrVar}
+\alias{svrVar}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Compute variance from replicates }
+\description{
+Compute an appropriately scaled empirical variance estimate from
+replicates. The \code{mse} argument specifies whether the sums of
+squares should be centered at the point estimate (\code{mse=TRUE}) or
+the mean of the replicates. It is usually taken from the \code{mse}
+component of the design object.
+}
+\usage{
+svrVar(thetas, scale, rscales, na.action=getOption("na.action"),
+ mse=getOption("survey.replicates.mse"),coef)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{thetas}{matrix whose rows are replicates (or a vector of replicates)}
+ \item{scale}{Overall scaling factor}
+ \item{rscales}{Scaling factor for each squared deviation }
+ \item{na.action}{How to handle replicates where the statistic could
+ not be estimated}
+ \item{mse}{if \code{TRUE}, center at the point estimated, if
+ \code{FALSE} center at the mean of the replicates}
+ \item{coef}{The point estimate, required only if \code{mse==TRUE}}
+}
+\value{
+covariance matrix.
+}
+\seealso{\code{\link{svrepdesign}}, \code{\link{as.svrepdesign}},
+ \code{\link{brrweights}},
+ \code{\link{jk1weights}}, \code{\link{jknweights}}}
+
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/svrepdesign.Rd b/man/svrepdesign.Rd
new file mode 100755
index 0000000..508f83b
--- /dev/null
+++ b/man/svrepdesign.Rd
@@ -0,0 +1,182 @@
+\name{svrepdesign}
+\alias{svrepdesign}
+\alias{svrepdesign.default}
+\alias{svrepdesign.imputationList}
+\alias{svrepdesign.character}
+\alias{[.svyrep.design}
+\alias{image.svyrep.design}
+\alias{print.svyrep.design}
+\alias{model.frame.svyrep.design}
+\alias{summary.svyrep.design}
+\alias{print.summary.svyrep.design}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Specify survey design with replicate weights}
+\description{
+Some recent large-scale surveys specify replication weights rather than
+the sampling design (partly for privacy reasons). This function specifies the
+data structure for such a survey.
+}
+\usage{
+svrepdesign(variables , repweights , weights, data,...)
+\method{svrepdesign}{default}(variables = NULL, repweights = NULL, weights = NULL,
+ data = NULL, type = c("BRR", "Fay", "JK1","JKn","bootstrap","other"),
+ combined.weights=TRUE, rho = NULL, bootstrap.average=NULL,
+ scale=NULL, rscales=NULL,fpc=NULL, fpctype=c("fraction","correction"),
+ mse=getOption("survey.replicates.mse"),...)
+\method{svrepdesign}{imputationList}(variables=NULL, repweights,weights,data,
+ mse=getOption("survey.replicates.mse"),...)
+\method{svrepdesign}{character}(variables=NULL,repweights=NULL, weights=NULL,data=NULL,
+type=c("BRR","Fay","JK1", "JKn","bootstrap","other"),combined.weights=TRUE, rho=NULL,
+bootstrap.average=NULL, scale=NULL,rscales=NULL,fpc=NULL,
+fpctype=c("fraction","correction"),mse=getOption("survey.replicates.mse"),
+ dbtype="SQLite", dbname,...)
+
+\method{image}{svyrep.design}(x, ..., col=grey(seq(.5,1,length=30)), type.=c("rep","total"))
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{variables}{formula or data frame specifying variables to include in the design (default is all) }
+ \item{repweights}{formula or data frame specifying replication weights, or character string specifying a regular expression that matches the names of the replication weight variables }
+ \item{weights}{sampling weights }
+ \item{data}{data frame to look up variables in formulas, or character string giving name of database table}
+ \item{type}{Type of replication weights}
+ \item{combined.weights}{\code{TRUE} if the \code{repweights} already
+ include the sampling weights. This is usually the case.}
+ \item{rho}{Shrinkage factor for weights in Fay's method}
+ \item{bootstrap.average}{For \code{type="bootstrap"}, if the bootstrap
+ weights have been averaged, gives the number of iterations averaged over}
+ \item{scale, rscales}{Scaling constant for variance, see Details
+below}
+\item{fpc,fpctype}{Finite population correction information}
+\item{mse}{If \code{TRUE}, compute variances based on sum of squares
+ around the point estimate, rather than the mean of the replicates}
+\item{dbname}{name of database, passed to \code{DBI::dbConnect()}}
+\item{dbtype}{Database driver: see Details}
+\item{x}{survey design with replicate weights}
+\item{...}{Other arguments to \code{\link{image}}}
+\item{col}{Colors}
+\item{type.}{\code{"rep"} for only the replicate weights, \code{"total"} for the replicate and sampling weights combined.}
+}
+\details{
+ In the BRR method, the dataset is split into halves, and the
+ difference between halves is used to estimate the variance. In Fay's
+ method, rather than removing observations from half the sample they
+ are given weight \code{rho} in one half-sample and \code{2-rho} in the
+ other. The ideal BRR analysis is restricted to a design where each
+ stratum has two PSUs, however, it has been used in a much wider class
+ of surveys.
+
+ The JK1 and JKn types are both jackknife estimators deleting one
+ cluster at a time. JKn is designed for stratified and JK1 for
+ unstratified designs.
+
+ Averaged bootstrap weights ("mean bootstrap") are used for some
+ surveys from Statistics Canada. Yee et al (1999) describe their
+ construction and use for one such survey.
+
+ The variance is computed as the sum of squared deviations of the
+ replicates from their mean. This may be rescaled: \code{scale} is an
+ overall multiplier and \code{rscales} is a vector of
+ replicate-specific multipliers for the squared deviations. That is,
+ \code{rscales} should have one entry for each column of \code{repweights}
+ If thereplication weights incorporate the sampling weights
+ (\code{combined.weights=TRUE}) or for \code{type="other"} these must
+ be specified, otherwise they can be guessed from the weights.
+
+ A finite population correction may be specified for \code{type="other"},
+ \code{type="JK1"} and \code{type="JKn"}. \code{fpc} must be a vector
+ with one entry for each replicate. To specify sampling fractions use
+ \code{fpctype="fraction"} and to specify the correction directly use
+ \code{fpctype="correction"}
+
+\code{repweights} may be a character string giving a regular expression
+ for the replicate weight variables. For example, in the
+California Health Interview Survey public-use data, the sampling weights are
+\code{"rakedw0"} and the replicate weights are \code{"rakedw1"} to
+\code{"rakedw80"}. The regular expression \code{"rakedw[1-9]"}
+matches the replicate weight variables (and not the sampling weight
+variable).
+
+\code{data} may be a character string giving the name of a table or view
+in a relational database that can be accessed through the \code{DBI} or \code{ODBC}
+interfaces. For DBI interfaces \code{dbtype} should be the name of the database
+driver and \code{dbname} should be the name by which the driver identifies
+the specific database (eg file name for SQLite). For ODBC databases
+\code{dbtype} should be \code{"ODBC"} and \code{dbname} should be the
+registed DSN for the database. On the Windows GUI, \code{dbname=""} will
+produce a dialog box for interactive selection.
+
+The appropriate database interface package must already be loaded (eg
+\code{RSQLite} for SQLite, \code{RODBC} for ODBC). The survey design
+object will contain the replicate weights, but actual variables will
+be loaded from the database only as needed. Use
+\code{\link[=close.DBIsvydesign]{close}} to close the database connection and
+\code{\link[=open.DBIsvydesign]{open}} to reopen the connection, eg, after
+loading a saved object.
+
+The database interface does not attempt to modify the underlying
+database and so can be used with read-only permissions on the database.
+
+
+ To generate your own replicate weights either use
+ \code{\link{as.svrepdesign}} on a \code{survey.design} object, or see
+ \code{\link{brrweights}}, \code{\link{bootweights}},
+ \code{\link{jk1weights}} and \code{\link{jknweights}}
+
+ The \code{model.frame} method extracts the observed data.
+
+}
+\value{
+ Object of class \code{svyrep.design}, with methods for \code{print},
+ \code{summary}, \code{weights}, \code{image}.
+}
+\references{Levy and Lemeshow. "Sampling of Populations". Wiley.
+
+ Shao and Tu. "The Jackknife and Bootstrap." Springer.
+
+ Yee et al (1999). Bootstrat Variance Estimation for the National
+ Population Health Survey. Proceedings of the ASA Survey Research
+ Methodology Section. \url{https://web.archive.org/web/20151110170959/http://www.amstat.org/sections/SRMS/Proceedings/papers/1999_136.pdf}
+}
+
+\note{To use replication-weight analyses on a survey specified by
+ sampling design, use \code{as.svrepdesign} to convert it. }
+\note{The successive difference weights in the American Community Survey
+ use \code{scale = 4/ncol(repweights)} and \code{rscales=rep(1,
+ ncol(repweights)}. JK2 weights use \code{scale=1},
+ \code{rscales=rep(1, ncol(repweights))}}
+
+\seealso{\code{\link{as.svrepdesign}}, \code{\link{svydesign}},
+ \code{\link{brrweights}}, \code{bootweights} }
+
+\examples{
+data(scd)
+# use BRR replicate weights from Levy and Lemeshow
+repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
+c(0,1,0,1,1,0))
+scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights, combined.weights=FALSE)
+svyratio(~alive, ~arrests, scdrep)
+
+\dontrun{
+## Needs RSQLite
+library(RSQLite)
+db_rclus1<-svrepdesign(weights=~pw, repweights="wt[1-9]+", type="JK1", scale=(1-15/757)*14/15,
+data="apiclus1rep",dbtype="SQLite", dbname=system.file("api.db",package="survey"), combined=FALSE)
+svymean(~api00+api99,db_rclus1)
+
+summary(db_rclus1)
+
+## closing and re-opening a connection
+close(db_rclus1)
+db_rclus1
+try(svymean(~api00+api99,db_rclus1))
+db_rclus1<-open(db_rclus1)
+svymean(~api00+api99,db_rclus1)
+
+
+
+}
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/svy.varcoef.Rd b/man/svy.varcoef.Rd
new file mode 100755
index 0000000..2ea4fd9
--- /dev/null
+++ b/man/svy.varcoef.Rd
@@ -0,0 +1,25 @@
+\name{svy.varcoef}
+\alias{svy.varcoef}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Sandwich variance estimator for glms}
+\description{
+Computes the sandwich variance estimator for a generalised linear model fitted to data from a complex sample survey. Designed to be used internally by \code{\link{svyglm}}.
+}
+\usage{
+svy.varcoef(glm.object, design)
+}
+\arguments{
+ \item{glm.object}{A \code{\link{glm}} object}
+ \item{design}{A \code{survey.design} object }
+}
+\value{
+ A variance matrix
+}
+\author{ Thomas Lumley}
+
+
+\seealso{\code{\link{svyglm}},\code{\link{svydesign}}, \code{\link{svyCprod}} }
+
+
+\keyword{regression}% at least one, from doc/KEYWORDS
+\keyword{survey}% __ONLY ONE__ keyword per line
diff --git a/man/svyCprod.Rd b/man/svyCprod.Rd
new file mode 100755
index 0000000..1aa4f6a
--- /dev/null
+++ b/man/svyCprod.Rd
@@ -0,0 +1,100 @@
+\name{svyCprod}
+\alias{svyCprod}
+\alias{onestage}
+\alias{onestrat}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Computations for survey variances}
+\description{
+Computes the sum of products needed for the variance of survey sample
+estimators. \code{svyCprod} is used for survey design objects from
+before version 2.9, \code{onestage} is called by \code{\link{svyrecvar}}
+for post-2.9 design objects.
+}
+\usage{
+svyCprod(x, strata, psu, fpc, nPSU,certainty=NULL, postStrata=NULL,
+ lonely.psu=getOption("survey.lonely.psu"))
+onestage(x, strata, clusters, nPSU, fpc,
+ lonely.psu=getOption("survey.lonely.psu"),stage=0,cal)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{x}{A vector or matrix}
+ \item{strata}{A vector of stratum indicators (may be \code{NULL} for \code{svyCprod})}
+ \item{psu}{A vector of cluster indicators (may be \code{NULL})}
+ \item{clusters}{A vector of cluster indicators }
+ \item{fpc}{A data frame (\code{svyCprod}) or vector (\code{onestage})
+ of population stratum sizes, or \code{NULL}}
+ \item{nPSU}{Table (\code{svyprod}) or vector (\code{onestage})
+ of original sample stratum sizes (or \code{NULL})}
+ \item{certainty}{logical vector with stratum names as names. If
+ \code{TRUE} and that stratum has a single PSU it is a certainty PSU}
+ \item{postStrata}{Post-stratification variables}
+ \item{lonely.psu}{One of \code{"remove"}, \code{"adjust"},
+ \code{"fail"}, \code{"certainty"}, \code{"average"}. See Details
+ below}
+ \item{stage}{Used internally to track the depth of recursion}
+ \item{cal}{Used to pass calibration information at stages below the population}
+}
+\details{
+ The observations for each cluster are added, then centered within each
+ stratum and the outer product is taken of the row vector resulting for
+ each cluster. This is added within strata, multiplied by a
+ degrees-of-freedom correction and by a finite population correction (if
+ supplied) and added across strata.
+
+ If there are fewer clusters (PSUs) in a stratum than in the original
+ design extra rows of zeroes are added to \code{x} to allow the correct
+ subpopulation variance to be computed.
+
+ See \code{\link{postStratify}} for information about
+ post-stratification adjustments.
+
+The variance formula gives 0/0 if a stratum contains only one sampling
+unit. If the \code{certainty} argument specifies that this is a PSU
+sampled with probability 1 (a "certainty" PSU) then it does not
+contribute to the variance (this is correct only when there is no
+subsampling within the PSU -- otherwise it should be defined as a
+pseudo-stratum). If \code{certainty} is \code{FALSE} for
+this stratum or is not supplied the result depends on \code{lonely.psu}.
+
+The options are \code{"fail"} to give an error, \code{"remove"} or
+\code{"certainty"} to give a variance contribution of 0 for the stratum,
+\code{"adjust"} to center the stratum at the grand mean rather than the
+stratum mean, and \code{"average"} to assign strata with one PSU the
+average variance contribution from strata with more than one PSU. The
+choice is controlled by setting \code{options(survey.lonely.psu)}. If
+this is not done the factory default is \code{"fail"}. Using
+\code{"adjust"} is conservative, and it would often be better to combine
+strata in some intelligent way. The properties of \code{"average"} have
+not been investigated thoroughly, but it may be useful when the lonely
+PSUs are due to a few strata having PSUs missing completely at random.
+
+The \code{"remove"}and \code{"certainty"} options give the same result,
+but \code{"certainty"} is intended for situations where there is only
+one PSU in the population stratum, which is sampled with certainty (also
+called `self-representing' PSUs or strata). With \code{"certainty"} no
+warning is generated for strata with only one PSU. Ordinarily,
+\code{svydesign} will detect certainty PSUs, making this option
+unnecessary.
+
+For strata with a single PSU in a subset (domain) the variance formula
+gives a value that is well-defined and positive, but not typically
+correct. If \code{options("survey.adjust.domain.lonely")} is \code{TRUE}
+and \code{options("survey.lonely.psu")} is \code{"adjust"} or
+\code{"average"}, and no post-stratification or G-calibration has been
+done, strata with a single PSU in a subset will be treated like those
+with a single PSU in the sample. I am not aware of any theoretical
+study of this procedure, but it should at least be conservative.
+
+}
+\value{
+ A covariance matrix
+}
+\author{Thomas Lumley}
+
+\references{Binder, David A. (1983). On the variances of asymptotically normal estimators from complex surveys. International Statistical Review, 51, 279- 292. }
+
+\seealso{\code{\link{svydesign}}, \code{\link{svyrecvar}}, \code{\link{surveyoptions}}, \code{\link{postStratify}} }
+
+\keyword{utilities}% at least one, from doc/KEYWORDS
+\keyword{survey}% __ONLY ONE__ keyword per line
diff --git a/man/svyby.Rd b/man/svyby.Rd
new file mode 100755
index 0000000..38231c1
--- /dev/null
+++ b/man/svyby.Rd
@@ -0,0 +1,161 @@
+\name{svyby}
+\alias{svyby}
+\alias{svyby.default}
+\alias{SE.svyby}
+\alias{deff.svyby}
+\alias{coef.svyby}
+\alias{confint.svyby}
+\alias{unwtd.count}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Survey statistics on subsets}
+\description{
+Compute survey statistics on subsets of a survey defined by factors.
+}
+\usage{
+svyby(formula, by ,design,...)
+\method{svyby}{default}(formula, by, design, FUN, ..., deff=FALSE,keep.var = TRUE,
+keep.names = TRUE,verbose=FALSE, vartype=c("se","ci","ci","cv","cvpct","var"),
+ drop.empty.groups=TRUE, covmat=FALSE, return.replicates=FALSE,
+ na.rm.by=FALSE, na.rm.all=FALSE, multicore=getOption("survey.multicore"))
+\method{SE}{svyby}(object,...)
+\method{deff}{svyby}(object,...)
+\method{coef}{svyby}(object,...)
+\method{confint}{svyby}(object, parm, level = 0.95,df =Inf,...)
+unwtd.count(x, design, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula,x}{A formula specifying the variables to pass to
+ \code{FUN} (or a matrix, data frame, or vector)}
+ \item{by}{A formula specifying factors that define subsets, or a list
+ of factors.}
+ \item{design}{A \code{svydesign} or \code{svrepdesign} object}
+ \item{FUN}{A function taking a formula and survey design object as its
+ first two arguments.}
+ \item{\dots}{Other arguments to \code{FUN}}
+ \item{deff}{Request a design effect from \code{FUN}}
+ \item{keep.var}{If \code{FUN} returns a \code{svystat} object, extract
+ standard errors from it}
+\item{keep.names}{Define row names based on the subsets}
+\item{verbose}{If \code{TRUE}, print a label for each subset as it is
+ processed.}
+\item{vartype}{Report variability as one or more of
+ standard error, confidence interval, coefficient of
+ variation, percent coefficient of variation, or variance}
+\item{drop.empty.groups}{If \code{FALSE}, report \code{NA} for empty
+ groups, if \code{TRUE} drop them from the output}
+\item{na.rm.by}{If true, omit groups defined by \code{NA} values of the
+ \code{by} variables}.
+\item{na.rm.all}{If true, check for groups with no non-missing
+ observations for variables defined by \code{formula} and treat these groups
+ as empty}
+\item{covmat}{If \code{TRUE}, compute covariances between estimates for
+ different subsets (currently only for replicate-weight
+ designs). Allows \code{\link{svycontrast}} to be used on output.}
+\item{return.replicates}{Only for replicate-weight designs. If
+ \code{TRUE}, return all the replicates as the "replicates" attribute of the result}
+\item{multicore}{Use \code{multicore} package to distribute subsets over
+ multiple processors?}
+ \item{parm}{a specification of which parameters are to be given
+ confidence intervals, either a vector of numbers or a vector of
+ names. If missing, all parameters are considered.}
+ \item{level}{the confidence level required.}
+ \item{df}{degrees of freedom for t-distribution in confidence
+ interval, use \code{degf(design)} for number of PSUs minus number of
+ strata}
+\item{object}{An object of class \code{"svyby"}}
+}
+
+\value{
+ An object of class \code{"svyby"}: a data frame showing the factors and the results of \code{FUN}.
+
+For \code{unwtd.count}, the unweighted number of non-missing observations in the data matrix specified by \code{x} for the design.
+}
+\details{
+The variance type "ci" asks for confidence intervals, which are produced
+ by \code{confint}. In some cases additional options to \code{FUN} will
+ be needed to produce confidence intervals, for example,
+ \code{svyquantile} needs \code{ci=TRUE} or \code{keep.var=FALSE}.
+
+ \code{unwtd.count} is designed to be passed to \code{svyby} to report
+ the number of non-missing observations in each subset. Observations
+ with exactly zero weight will also be counted as missing, since that's
+ how subsets are implemented for some designs.
+
+ Parallel processing with \code{multicore=TRUE} is useful only for
+ fairly large problems and on computers with sufficient memory. The
+ \code{multicore} package is incompatible with some GUIs, although the
+ Mac Aqua GUI appears to be safe.
+
+ }
+
+ \note{The function works by making a lot of calls of the form
+ \code{FUN(formula, subset(design, by==i))}, where \code{formula} is
+ re-evaluated in each subset, so it is unwise to use data-dependent
+ terms in \code{formula}. In particular, \code{svyby(~factor(a), ~b,
+ design=d, svymean)}, will create factor variables whose levels are
+ only those values of \code{a} present in each subset. Use
+ \code{\link{update.survey.design}} to add variables to the design
+ object instead.
+
+
+ }
+
+\note{ Asking for a design effect (\code{deff=TRUE}) from a function
+ that does not produce one will cause an error or incorrect formatting
+ of the output. The same will occur with \code{keep.var=TRUE} if the
+ function does not compute a standard error.
+}
+\seealso{\code{\link{svytable}} and \code{\link{ftable.svystat}} for
+ contingency tables, \code{\link{ftable.svyby}} for pretty-printing of \code{svyby} }
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+svyby(~api99, ~stype, dclus1, svymean)
+svyby(~api99, ~stype, dclus1, svyquantile, quantiles=0.5,ci=TRUE,vartype="ci")
+## without ci=TRUE svyquantile does not compute standard errors
+svyby(~api99, ~stype, dclus1, svyquantile, quantiles=0.5, keep.var=FALSE)
+svyby(~api99, list(school.type=apiclus1$stype), dclus1, svymean)
+svyby(~api99+api00, ~stype, dclus1, svymean, deff=TRUE,vartype="ci")
+svyby(~api99+api00, ~stype+sch.wide, dclus1, svymean, keep.var=FALSE)
+## report raw number of observations
+svyby(~api99+api00, ~stype+sch.wide, dclus1, unwtd.count, keep.var=FALSE)
+
+rclus1<-as.svrepdesign(dclus1)
+
+svyby(~api99, ~stype, rclus1, svymean)
+svyby(~api99, ~stype, rclus1, svyquantile, quantiles=0.5)
+svyby(~api99, list(school.type=apiclus1$stype), rclus1, svymean, vartype="cv")
+svyby(~enroll,~stype, rclus1,svytotal, deff=TRUE)
+svyby(~api99+api00, ~stype+sch.wide, rclus1, svymean, keep.var=FALSE)
+##report raw number of observations
+svyby(~api99+api00, ~stype+sch.wide, rclus1, unwtd.count, keep.var=FALSE)
+
+## comparing subgroups using covmat=TRUE
+mns<-svyby(~api99, ~stype, rclus1, svymean,covmat=TRUE)
+vcov(mns)
+svycontrast(mns, c(E = 1, M = -1))
+
+str(svyby(~api99, ~stype, rclus1, svymean,return.replicates=TRUE))
+
+
+## extractor functions
+(a<-svyby(~enroll, ~stype, rclus1, svytotal, deff=TRUE, verbose=TRUE,
+ vartype=c("se","cv","cvpct","var")))
+deff(a)
+SE(a)
+cv(a)
+coef(a)
+confint(a, df=degf(rclus1))
+
+## ratio estimates
+svyby(~api.stu, by=~stype, denominator=~enroll, design=dclus1, svyratio)
+
+## empty groups
+svyby(~api00,~comp.imp+sch.wide,design=dclus1,svymean)
+svyby(~api00,~comp.imp+sch.wide,design=dclus1,svymean,drop.empty.groups=FALSE)
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/svycdf.Rd b/man/svycdf.Rd
new file mode 100644
index 0000000..e1779c4
--- /dev/null
+++ b/man/svycdf.Rd
@@ -0,0 +1,61 @@
+\name{svycdf}
+\alias{svycdf}
+\alias{print.svycdf}
+\alias{plot.svycdf}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Cumulative Distribution Function}
+\description{
+Estimates the population cumulative distribution function for specified
+variables. In contrast to \code{\link{svyquantile}}, this does not do
+any interpolation: the result is a right-continuous step function.
+}
+\usage{
+svycdf(formula, design, na.rm = TRUE,...)
+\method{print}{svycdf}(x,...)
+\method{plot}{svycdf}(x,xlab=NULL,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{one-sided formula giving variables from the design object }
+ \item{design}{survey design object }
+ \item{na.rm}{remove missing data (case-wise deletion)?}
+ \item{...}{other arguments to \code{\link{plot.stepfun}}}
+ \item{x}{object of class \code{svycdf}}
+ \item{xlab}{a vector of x-axis labels or \code{NULL} for the default labels}
+}
+
+\value{
+ An object of class \code{svycdf}, which is a list of step functions (of
+ class \code{\link{stepfun}})
+ }
+
+
+\seealso{ \code{\link{svyquantile}}, \code{\link{svyhist}}, \code{\link{plot.stepfun}}}
+\examples{
+data(api)
+dstrat <- svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat,
+ fpc = ~fpc)
+cdf.est<-svycdf(~enroll+api00+api99, dstrat)
+cdf.est
+## function
+cdf.est[[1]]
+## evaluate the function
+cdf.est[[1]](800)
+cdf.est[[2]](800)
+
+## compare to population and sample CDFs.
+opar<-par(mfrow=c(2,1))
+cdf.pop<-ecdf(apipop$enroll)
+cdf.samp<-ecdf(apistrat$enroll)
+plot(cdf.pop,main="Population vs sample", xlab="Enrollment")
+lines(cdf.samp,col.points="red")
+
+plot(cdf.pop, main="Population vs estimate", xlab="Enrollment")
+lines(cdf.est[[1]],col.points="red")
+
+par(opar)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{hplot}% __ONLY ONE__ keyword per line
diff --git a/man/svychisq.Rd b/man/svychisq.Rd
new file mode 100755
index 0000000..eca70c3
--- /dev/null
+++ b/man/svychisq.Rd
@@ -0,0 +1,181 @@
+\name{svytable}
+\alias{svreptable}
+\alias{svytable}
+\alias{svytable.svyrep.design}
+\alias{svytable.survey.design}
+\alias{svychisq}
+\alias{svychisq.survey.design}
+\alias{svychisq.svyrep.design}
+\alias{summary.svytable}
+\alias{print.summary.svytable}
+\alias{summary.svreptable}
+\alias{degf}
+\alias{degf.svyrep.design}
+\alias{degf.survey.design2}
+\alias{degf.twophase}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Contingency tables for survey data}
+\description{
+ Contingency tables and chisquared tests of association for survey data.
+}
+\usage{
+\method{svytable}{survey.design}(formula, design, Ntotal = NULL, round = FALSE,...)
+\method{svytable}{svyrep.design}(formula, design, Ntotal = sum(weights(design, "sampling")), round = FALSE,...)
+\method{svychisq}{survey.design}(formula, design,
+ statistic = c("F", "Chisq","Wald","adjWald","lincom","saddlepoint"),na.rm=TRUE,...)
+\method{svychisq}{svyrep.design}(formula, design,
+ statistic = c("F", "Chisq","Wald","adjWald","lincom","saddlepoint"),na.rm=TRUE,...)
+\method{summary}{svytable}(object,
+ statistic = c("F","Chisq","Wald","adjWald","lincom","saddlepoint"),...)
+degf(design, ...)
+\method{degf}{survey.design2}(design, ...)
+\method{degf}{svyrep.design}(design, tol=1e-5,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{Model formula specifying margins for the table (using \code{+} only)}
+ \item{design}{survey object}
+ \item{statistic}{See Details below}
+ \item{Ntotal}{A population total or set of population stratum totals
+ to normalise to.}
+ \item{round}{Should the table entries be rounded to the nearest
+ integer?}
+ \item{na.rm}{Remove missing values}
+ \item{object}{Output from \code{svytable}}
+ \item{...}{For \code{svytable} these are passed to \code{xtabs}. Use
+ \code{exclude=NULL}, \code{na.action=na.pass} to include \code{NA}s
+ in the table}
+ \item{tol}{Tolerance for \code{\link{qr}} in computing the matrix rank}
+ }
+\details{
+
+The \code{svytable} function computes a weighted crosstabulation. This
+is especially useful for producing graphics. It is sometimes easier
+to use \code{\link{svytotal}} or \code{\link{svymean}}, which also
+produce standard errors, design effects, etc.
+
+The frequencies in the table can be normalised to some convenient total
+such as 100 or 1.0 by specifying the \code{Ntotal} argument. If the
+formula has a left-hand side the mean or sum of this variable rather
+than the frequency is tabulated.
+
+The \code{Ntotal} argument can be either a single number or a data
+frame whose first column gives the (first-stage) sampling strata and
+second column the population size in each stratum. In this second case
+the \code{svytable} command performs `post-stratification': tabulating
+and scaling to the population within strata and then adding up the
+strata.
+
+As with other \code{xtabs} objects, the output of \code{svytable} can be
+processed by \code{ftable} for more attractive display. The
+\code{summary} method for \code{svytable} objects calls \code{svychisq}
+for a test of independence.
+
+\code{svychisq} computes first and second-order Rao-Scott corrections to
+the Pearson chisquared test, and two Wald-type tests.
+
+The default (\code{statistic="F"}) is the Rao-Scott second-order
+correction. The p-values are computed with a Satterthwaite
+approximation to the distribution and with denominator degrees of
+freedom as recommended by Thomas and Rao (1990). The alternative
+\code{statistic="Chisq"} adjusts the Pearson chisquared statistic by a
+design effect estimate and then compares it to the chisquared
+distribution it would have under simple random sampling.
+
+The \code{statistic="Wald"} test is that proposed by Koch et al (1975)
+and used by the SUDAAN software package. It is a Wald test based on the
+differences between the observed cells counts and those expected under
+independence. The adjustment given by \code{statistic="adjWald"} reduces
+the statistic when the number of PSUs is small compared to the number of
+degrees of freedom of the test. Thomas and Rao (1990) compare these
+tests and find the adjustment benefical.
+
+\code{statistic="lincom"} replaces the numerator of the Rao-Scott F with
+the exact asymptotic distribution, which is a linear combination of
+chi-squared variables (see \code{\link{pchisqsum}}, and
+\code{statistic="saddlepoint"} uses a saddlepoint approximation to this
+distribution. The \code{CompQuadForm} package is needed for
+\code{statistic="lincom"} but not for
+\code{statistic="saddlepoint"}. The saddlepoint approximation is
+especially useful when the p-value is very small (as in large-scale
+multiple testing problems).
+
+For designs using replicate weights the code is essentially the same as
+for designs with sampling structure, since the necessary variance
+computations are done by the appropriate methods of
+\code{\link{svytotal}} and \code{\link{svymean}}. The exception is that
+the degrees of freedom is computed as one less than the rank of the
+matrix of replicate weights (by \code{degf}).
+
+
+At the moment, \code{svychisq} works only for 2-dimensional tables.
+}
+\value{
+ The table commands return an \code{xtabs} object, \code{svychisq}
+ returns a \code{htest} object.
+}
+\references{
+Davies RB (1973). "Numerical inversion of a characteristic function"
+Biometrika 60:415-7
+
+P. Duchesne, P. Lafaye de Micheaux (2010) "Computing the distribution of
+quadratic forms: Further comparisons between the Liu-Tang-Zhang
+approximation and exact methods", Computational Statistics and Data
+Analysis, Volume 54, 858-862
+
+Koch, GG, Freeman, DH, Freeman, JL (1975) "Strategies in the
+multivariate analysis of data from complex surveys" International
+Statistical Review 43: 59-78
+
+Rao, JNK, Scott, AJ (1984) "On Chi-squared Tests For Multiway
+Contigency Tables with Proportions Estimated From Survey Data" Annals
+of Statistics 12:46-60.
+
+Sribney WM (1998) "Two-way contingency tables for survey or clustered
+data" Stata Technical Bulletin 45:33-49.
+
+Thomas, DR, Rao, JNK (1990) "Small-sample comparison of level and power
+for simple goodness-of-fit statistics under cluster sampling" JASA 82:630-636
+}
+
+\note{Rao and Scott (1984) leave open one computational issue. In
+ computing `generalised design effects' for these tests, should the
+ variance under simple random sampling be estimated using the observed
+ proportions or the the predicted proportions under the null
+ hypothesis? \code{svychisq} uses the observed proportions, following
+ simulations by Sribney (1998), and the choices made in Stata}
+
+
+\seealso{\code{\link{svytotal}} and \code{\link{svymean}} report totals
+ and proportions by category for factor variables.
+
+ See \code{\link{svyby}} and \code{\link{ftable.svystat}} to construct
+ more complex tables of summary statistics.
+
+ See \code{\link{svyloglin}} for loglinear models.
+
+ See \code{\link{regTermTest}} for Rao-Scott tests in regression models.
+}
+\examples{
+ data(api)
+ xtabs(~sch.wide+stype, data=apipop)
+
+ dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+ summary(dclus1)
+
+ (tbl <- svytable(~sch.wide+stype, dclus1))
+ plot(tbl)
+ fourfoldplot(svytable(~sch.wide+comp.imp+stype,design=dclus1,round=TRUE), conf.level=0)
+
+ svychisq(~sch.wide+stype, dclus1)
+ summary(tbl, statistic="Chisq")
+ svychisq(~sch.wide+stype, dclus1, statistic="adjWald")
+
+ rclus1 <- as.svrepdesign(dclus1)
+ summary(svytable(~sch.wide+stype, rclus1))
+ svychisq(~sch.wide+stype, rclus1, statistic="adjWald")
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{category}% __ONLY ONE__ keyword per line
+\keyword{htest}% __ONLY ONE__ keyword per line
diff --git a/man/svyciprop.Rd b/man/svyciprop.Rd
new file mode 100644
index 0000000..53ef782
--- /dev/null
+++ b/man/svyciprop.Rd
@@ -0,0 +1,103 @@
+\name{svyciprop}
+\alias{svyciprop}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Confidence intervals for proportions }
+\description{
+Computes confidence intervals for proportions using methods that may be
+more accurate near 0 and 1 than simply using \code{confint(svymean())}.
+}
+\usage{
+svyciprop(formula, design, method = c("logit", "likelihood", "asin", "beta",
+"mean","xlogit"), level = 0.95, df=degf(design),...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{Model formula specifying a single binary variable}
+ \item{design}{ survey design object}
+ \item{method}{ See Details below. Partial matching is done on the argument.}
+ \item{level}{Confidence level for interval}
+ \item{df}{denominator degrees of freedom, for all methods except
+ \code{"beta"}. Use \code{Inf} for confidence intervals based on a
+ Normal distribution, and for \code{"likelihood"} and \code{"logit"}
+ use \code{NULL} for the default method in glms (currently
+ \code{degf(design)-1}, but this may be improved in the future)}
+ \item{\dots}{For \code{"mean"} and \code{"asin"}, this is passed to \code{\link{confint.svystat}} }
+}
+\details{
+The \code{"logit"} method fits a logistic regression model and computes a
+Wald-type interval on the log-odds scale, which is then transformed to
+the probability scale.
+
+The \code{"likelihood"} method uses the (Rao-Scott) scaled chi-squared distribution
+for the loglikelihood from a binomial distribution.
+
+The \code{"asin"} method uses the variance-stabilising transformation
+for the binomial distribution, the arcsine square root, and then
+back-transforms the interval to the probability scale
+
+The \code{"beta"} method uses the incomplete beta function as in
+\code{\link{binom.test}}, with an effective sample size based on the
+estimated variance of the proportion. (Korn and Graubard, 1998)
+
+The \code{"xlogit"} method uses a logit transformation of the mean and
+then back-transforms to the probablity scale. This appears to be the
+method used by SUDAAN and SPSS COMPLEX SAMPLES.
+
+The \code{"mean"} method is a Wald-type interval on the probability
+scale, the same as \code{confint(svymean())}
+
+All methods undercover for probabilities close enough to zero or one,
+but \code{"beta"}, \code{"likelihood"}, \code{"logit"}, and \code{"logit"} are noticeably
+better than the other two. None of the methods will work when the
+observed proportion is exactly 0 or 1.
+
+The \code{confint} method extracts the confidence interval; the
+\code{vcov} and \code{SE} methods just report the variance or standard
+error of the mean.
+}
+\value{
+ The point estimate of the proportion, with the confidence interval as
+ an attribute
+}
+\references{
+Rao, JNK, Scott, AJ (1984) "On Chi-squared Tests For Multiway
+Contingency Tables with Proportions Estimated From Survey Data" Annals
+of Statistics 12:46-60.
+
+Korn EL, Graubard BI. (1998) Confidence Intervals For Proportions With
+ Small Expected Number of Positive Counts Estimated From Survey
+ Data. Survey Methodology 23:193-201.
+
+}
+
+\seealso{ \code{\link{svymean}}, \code{\link{yrbs}} }
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, fpc=~fpc, data=apiclus1)
+
+svyciprop(~I(ell==0), dclus1, method="li")
+svyciprop(~I(ell==0), dclus1, method="lo")
+svyciprop(~I(ell==0), dclus1, method="as")
+svyciprop(~I(ell==0), dclus1, method="be")
+svyciprop(~I(ell==0), dclus1, method="me")
+svyciprop(~I(ell==0), dclus1, method="xl")
+
+## reproduces Stata svy: mean
+svyciprop(~I(ell==0), dclus1, method="me", df=degf(dclus1))
+## reproduces Stata svy: prop
+svyciprop(~I(ell==0), dclus1, method="lo", df=degf(dclus1))
+
+
+rclus1<-as.svrepdesign(dclus1)
+svyciprop(~I(emer==0), rclus1, method="li")
+svyciprop(~I(emer==0), rclus1, method="lo")
+svyciprop(~I(emer==0), rclus1, method="as")
+svyciprop(~I(emer==0), rclus1, method="be")
+svyciprop(~I(emer==0), rclus1, method="me")
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/svycontrast.Rd b/man/svycontrast.Rd
new file mode 100644
index 0000000..2ed1b4d
--- /dev/null
+++ b/man/svycontrast.Rd
@@ -0,0 +1,60 @@
+\name{svycontrast}
+\alias{svycontrast}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Linear and nonlinearconstrasts of survey statistics }
+\description{
+Computes linear or nonlinear contrasts of estimates produced by survey
+functions (or any object with \code{coef} and \code{vcov} methods).
+}
+\usage{
+svycontrast(stat, contrasts, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{stat}{object of class \code{svrepstat} or \code{svystat} }
+ \item{contrasts}{A vector or list of vectors of coefficients, or a
+ call or list of calls }
+ \item{\dots}{For future expansion}
+}
+\value{
+Object of class \code{svrepstat} or \code{svystat}
+}
+
+\details{
+If \code{contrasts} is a list, the element names are used as
+names for the returned statistics.
+
+If an element of \code{contrasts} is shorter than \code{coef(stat)} and has names, the
+names are used to match up the vectors and the remaining elements of
+\code{contrasts} are assumed to be zero. If the names are not legal
+variable names (eg \code{0.1}) they must be quoted (eg \code{"0.1"})
+
+If \code{contrasts} is a \code{"call"} or list of \code{"call"s}, the
+delta-method is used to compute variances, and the calls must use only
+functions that \code{\link{deriv}} knows how to differentiate. If the
+names are not legal variable names they must be quoted with backticks
+(eg \code{`0.1`}).
+ }
+\seealso{\code{\link{regTermTest}}, \code{svyglm}}
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+a <- svytotal(~api00+enroll+api99, dclus1)
+svycontrast(a, list(avg=c(0.5,0,0.5), diff=c(1,0,-1)))
+## if contrast vectors have names, zeroes may be omitted
+svycontrast(a, list(avg=c(api00=0.5,api99=0.5), diff=c(api00=1,api99=-1)))
+
+## nonlinear contrasts
+svycontrast(a, quote(api00/api99))
+svyratio(~api00, ~api99, dclus1)
+
+## Example: standardised skewness coefficient
+moments<-svymean(~I(api00^3)+I(api00^2)+I(api00), dclus1)
+svycontrast(moments,
+quote((`I(api00^3)`-3*`I(api00^2)`*`I(api00)`+ 3*`I(api00)`*`I(api00)`^2-`I(api00)`^3)/
+ (`I(api00^2)`-`I(api00)`^2)^1.5))
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/svycoplot.Rd b/man/svycoplot.Rd
new file mode 100644
index 0000000..00df281
--- /dev/null
+++ b/man/svycoplot.Rd
@@ -0,0 +1,53 @@
+\name{svycoplot}
+\alias{svycoplot}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Conditioning plots of survey data }
+\description{
+Draws conditioned scatterplots ('Trellis' plots) of survey data using
+hexagonal binning or transparency.
+}
+\usage{
+svycoplot(formula, design, style = c("hexbin", "transparent"), basecol =
+"black", alpha = c(0, 0.8),hexscale=c("relative","absolute"), ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{A graph formula suitable for \code{\link{xyplot}}}
+ \item{design}{A survey design object }
+ \item{style}{Hexagonal binning or transparent color?}
+ \item{basecol}{The fully opaque 'base' color for creating transparent
+ colors. This may also be a function; see \code{\link{svyplot}} for details}
+ \item{alpha}{Minimum and maximum opacity }
+ \item{hexscale}{Scale hexagons separate for each panel (relative) or
+ across all panels (absolute)}
+ \item{\dots}{Other arguments passed to \code{grid.hexagons} or \code{\link{xyplot}} }
+}
+
+\value{
+ An object of class \code{trellis}
+}
+\note{
+As with all 'Trellis' graphs, this function creates an object but does
+ not draw the graph. When used inside a function or non-interactively
+ you need to \code{print()} the result to create the graph.
+}
+\seealso{\code{\link{svyplot}}}
+\examples{
+data(api)
+dclus2<-svydesign(id=~dnum+snum, weights=~pw,
+ data=apiclus2, fpc=~fpc1+fpc2)
+
+svycoplot(api00~api99|sch.wide*comp.imp, design=dclus2, style="hexbin")
+svycoplot(api00~api99|sch.wide*comp.imp, design=dclus2, style="hexbin", hexscale="absolute")
+
+svycoplot(api00~api99|sch.wide, design=dclus2, style="trans")
+
+svycoplot(api00~meals|stype,design=dclus2,
+ style="transparent",
+ basecol=function(d) c("darkred","purple","forestgreen")[as.numeric(d$stype)],
+ alpha=c(0,1))
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{hplot}% __ONLY ONE__ keyword per line
diff --git a/man/svycoxph.Rd b/man/svycoxph.Rd
new file mode 100755
index 0000000..0c00a7f
--- /dev/null
+++ b/man/svycoxph.Rd
@@ -0,0 +1,117 @@
+\name{svycoxph}
+\alias{svycoxph}
+\alias{svycoxph.survey.design2}
+\alias{svycoxph.survey.design}
+\alias{svycoxph.svyrep.design}
+\alias{predict.svycoxph}
+%\alias{print.svycoxph}
+%\alias{model.frame.svycoxph}
+%\alias{summary.svycoxph}
+%\alias{anova.svycoxph}
+%\alias{extractAIC.svycoxph}
+%\alias{survfit.svycoxph}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Survey-weighted Cox models.}
+\description{
+Fit a proportional hazards model to data from a complex survey design.
+}
+\usage{
+svycoxph(formula, design,subset=NULL, ...)
+\method{predict}{svycoxph}(object, newdata, se=FALSE,
+ type=c("lp", "risk", "expected", "terms","curve"),...)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{formula}{Model formula. Any \code{cluster()} terms will be ignored.}
+ \item{design}{ \code{survey.design} object. Must contain all variables
+ in the formula}
+ \item{subset}{Expression to select a subpopulation}
+ \item{object}{A \code{svycoxph} object}
+ \item{newdata}{New data for prediction}
+ \item{se}{Compute standard errors? This takes a lot of memory for
+ \code{type="curve"}}
+ \item{type}{"curve" does predicted survival curves. The other values
+ are passed to \code{predict.coxph()}}
+ \item{\dots}{Other arguments passed to \code{coxph}. }
+}
+\details{
+The main difference between \code{svycoxph} function and the \code{robust=TRUE}
+option to \code{\link{coxph}} in the
+survival package is that this function accounts for the reduction in
+variance from stratified sampling and the increase in variance from
+having only a small number of clusters.
+
+Note that \code{strata} terms in the model formula describe subsets that
+have a separate baseline hazard function and need not have anything to
+do with the stratification of the sampling.
+
+The standard errors for predicted survival curves are available only by linearization, not
+ by replicate weights (at the moment). Use
+ \code{\link{withReplicates}} to get standard errors with replicate
+ weights. Predicted survival curves are not available for stratified
+ Cox models.
+
+The standard errors use the delta-method approach of Williams (1995)
+ for the Nelson-Aalen estimator, modified to handle the Cox model
+ following Tsiatis (1981). The standard errors agree closely with
+ \code{survfit.coxph} for independent sampling when the model fits
+ well, but are larger when the model fits poorly. I believe the
+ standard errors are equivalent to those of Lin (2000), but I don't
+ know of any implementation that would allow a check.
+ }
+\value{
+ An object of class \code{svycoxph} for \code{svycoxph}, an object of
+ class \code{svykm} or \code{svykmlist} for \code{predict(,type="curve")}.
+}
+\section{Warning}{
+The standard error calculation for survival curves uses memory
+ proportional to the sample size times the square of the number of events.
+ }
+\author{Thomas Lumley}
+
+\references{Binder DA. (1992) Fitting Cox's proportional hazards models
+ from survey data. Biometrika 79: 139-147
+
+Lin D-Y (2000) On fitting Cox's proportional hazards model to survey data. Biometrika 87: 37-47
+
+Tsiatis AA (1981) A Large Sample Study of Cox's Regression Model. Annals
+of Statistics 9(1) 93-108
+
+Williams RL (1995) "Product-Limit Survival Functions with Correlated
+Survival Times" Lifetime Data Analysis 1: 171--186
+
+}
+\seealso{ \code{\link{coxph}}, \code{\link{predict.coxph}}
+
+ \code{\link{svykm}} for estimation of Kaplan-Meier survival curves and
+ for methods that operate on survival curves.
+
+\code{\link{regTermTest}} for Wald and (Rao-Scott) likelihood ratio tests for one or more parameters.
+}
+
+\examples{
+## Somewhat unrealistic example of nonresponse bias.
+data(pbc, package="survival")
+
+pbc$randomized<-with(pbc, !is.na(trt) & trt>0)
+biasmodel<-glm(randomized~age*edema,data=pbc,family=binomial)
+pbc$randprob<-fitted(biasmodel)
+if (is.null(pbc$albumin)) pbc$albumin<-pbc$alb ##pre2.9.0
+
+dpbc<-svydesign(id=~1, prob=~randprob, strata=~edema, data=subset(pbc,randomized))
+rpbc<-as.svrepdesign(dpbc)
+
+(model<-svycoxph(Surv(time,status>0)~log(bili)+protime+albumin,design=dpbc))
+
+svycoxph(Surv(time,status>0)~log(bili)+protime+albumin,design=rpbc)
+
+s<-predict(model,se=TRUE, type="curve",
+ newdata=data.frame(bili=c(3,9), protime=c(10,10), albumin=c(3.5,3.5)))
+plot(s[[1]],ci=TRUE,col="sienna")
+lines(s[[2]], ci=TRUE,col="royalblue")
+quantile(s[[1]], ci=TRUE)
+confint(s[[2]], parm=365*(1:5))
+}
+\keyword{regression}% at least one, from doc/KEYWORDS
+\keyword{survival}% at least one, from doc/KEYWORDS
+\keyword{survey}% at least one, from doc/KEYWORDS
diff --git a/man/svydesign.Rd b/man/svydesign.Rd
new file mode 100755
index 0000000..37ef6f6
--- /dev/null
+++ b/man/svydesign.Rd
@@ -0,0 +1,212 @@
+\name{svydesign}
+\alias{svydesign}
+\alias{svydesign.default}
+\alias{svydesign.imputationList}
+\alias{svydesign.character}
+\alias{na.omit.survey.design}
+\alias{na.exclude.survey.design}
+\alias{na.fail.survey.design}
+
+\title{Survey sample analysis.}
+\description{
+ Specify a complex survey design.
+}
+\usage{
+svydesign(ids, probs=NULL, strata = NULL, variables = NULL, fpc=NULL,
+data = NULL, nest = FALSE, check.strata = !nest, weights=NULL,pps=FALSE,...)
+\method{svydesign}{default}(ids, probs=NULL, strata = NULL, variables = NULL,
+ fpc=NULL,data = NULL, nest = FALSE, check.strata = !nest, weights=NULL,
+ pps=FALSE,variance=c("HT","YG"),...)
+\method{svydesign}{imputationList}(ids, probs = NULL, strata = NULL, variables = NULL,
+ fpc = NULL, data, nest = FALSE, check.strata = !nest, weights = NULL, pps=FALSE,
+ ...)
+\method{svydesign}{character}(ids, probs = NULL, strata = NULL, variables = NULL,
+ fpc = NULL, data, nest = FALSE, check.strata = !nest, weights = NULL, pps=FALSE,
+ dbtype = "SQLite", dbname, ...)
+}
+%- maybe also `usage' for other objects documented here.'`
+\arguments{
+ \item{ids}{Formula or data frame specifying cluster ids from largest
+ level to smallest level, \code{~0} or \code{~1} is a formula for no clusters.}
+ \item{probs}{Formula or data frame specifying cluster sampling probabilities}
+ \item{strata}{Formula or vector specifying strata, use \code{NULL} for no strata}
+ \item{variables}{Formula or data frame specifying the variables
+ measured in the survey. If \code{NULL}, the \code{data} argument is
+ used.}
+ \item{fpc}{Finite population correction: see Details below}
+ \item{weights}{Formula or vector specifying sampling weights as an
+ alternative to \code{prob}}
+ \item{data}{Data frame to look up variables in the formula
+ arguments, or database table name, or \code{imputationList} object, see below}
+ \item{nest}{If \code{TRUE}, relabel cluster ids to enforce nesting
+ within strata}
+ \item{check.strata}{If \code{TRUE}, check that clusters are nested in
+ strata}.
+ \item{pps}{\code{"brewer"} to use Brewer's approximation for PPS
+ sampling without replacement. \code{"overton"} to use
+ Overton's approximation. An object of class \code{\link{HR}} to use the Hartley-Rao approximation. An
+ object of class \code{\link{ppsmat}} to use the Horvitz-Thompson estimator.}
+ \item{dbtype}{name of database driver to pass to \code{dbDriver}}
+ \item{dbname}{name of database (eg file name for SQLite)}
+ \item{variance}{For \code{pps} without replacement, use \code{variance="YG"} for the Yates-Grundy estimator instead of the Horvitz-Thompson estimator}
+ \item{\dots}{for future expansion}
+}
+\details{
+ The \code{svydesign} object combines a data frame and all the survey
+ design information needed to analyse it. These objects are used by
+ the survey modelling and summary functions. The
+ \code{id} argument is always required, the \code{strata},
+ \code{fpc}, \code{weights} and \code{probs} arguments are
+ optional. If these variables are specified they must not have any
+ missing values.
+
+ By default, \code{svydesign} assumes that all PSUs, even those in
+ different strata, have a unique value of the \code{id}
+ variable. This allows some data errors to be detected. If your PSUs
+ reuse the same identifiers across strata then set \code{nest=TRUE}.
+
+
+ The finite population correction (fpc) is used to reduce the variance when
+ a substantial fraction of the total population of interest has been
+ sampled. It may not be appropriate if the target of inference is the
+ process generating the data rather than the statistics of a
+ particular finite population.
+
+ The finite population correction can be specified either as the total
+ population size in each stratum or as the fraction of the total
+ population that has been sampled. In either case the relevant
+ population size is the sampling units. That is, sampling 100 units
+ from a population stratum of size 500 can be specified as 500 or as
+ 100/500=0.2. The exception is for PPS sampling without replacement, where the
+ sampling probability (which will be different for each PSU) must be used.
+
+ If population sizes are specified but not sampling probabilities or
+ weights, the sampling probabilities will be computed from the
+ population sizes assuming simple random sampling within strata.
+
+ For multistage sampling the \code{id} argument should specify a
+ formula with the cluster identifiers at each stage. If subsequent
+ stages are stratified \code{strata} should also be specified as a
+ formula with stratum identifiers at each stage. The population size
+ for each level of sampling should also be specified in \code{fpc}.
+ If \code{fpc} is not specified then sampling is assumed to be with
+ replacement at the top level and only the first stage of cluster is
+ used in computing variances. If \code{fpc} is specified but for fewer
+ stages than \code{id}, sampling is assumed to be complete for
+ subsequent stages. The variance calculations for
+ multistage sampling assume simple or stratified random sampling
+ within clusters at each stage except possibly the last.
+
+ For PPS sampling without replacement it is necessary to specify the
+ probabilities for each stage of sampling using the \code{fpc}
+ arguments, and an overall \code{weight} argument should not be
+ given. At the moment, multistage or stratified PPS sampling without
+ replacement is supported only with \code{pps="brewer"}, or by
+ giving the full joint probability matrix using
+ \code{\link{ppsmat}}. [Cluster sampling is supported by all
+ methods, but not subsampling within clusters].
+
+ The \code{dim}, \code{"["}, \code{"[<-"} and na.action methods for
+ \code{survey.design} objects operate on the dataframe specified by
+ \code{variables} and ensure that the design information is properly
+ updated to correspond to the new data frame. With the \code{"[<-"}
+ method the new value can be a \code{survey.design} object instead of a
+ data frame, but only the data frame is used. See also
+ \code{\link{subset.survey.design}} for a simple way to select
+ subpopulations.
+
+The \code{model.frame} method extracts the observed data.
+
+
+ If the strata with only one PSU are not self-representing (or they are,
+but \code{svydesign} cannot tell based on \code{fpc}) then the handling
+of these strata for variance computation is determined by
+\code{options("survey.lonely.psu")}. See \code{\link{svyCprod}} for
+details.
+
+\code{data} may be a character string giving the name of a table or view
+in a relational database that can be accessed through the \code{DBI} or \code{ODBC}
+interfaces. For DBI interfaces \code{dbtype} should be the name of the database
+driver and \code{dbname} should be the name by which the driver identifies
+the specific database (eg file name for SQLite). For ODBC databases
+\code{dbtype} should be \code{"ODBC"} and \code{dbname} should be the
+registed DSN for the database. On the Windows GUI, \code{dbname=""} will
+produce a dialog box for interactive selection.
+
+The appropriate database interface package must already be loaded (eg
+\code{RSQLite} for SQLite, \code{RODBC} for ODBC). The survey design
+object will contain only the design meta-data, and actual variables will
+be loaded from the database as needed. Use
+\code{\link[=close.DBIsvydesign]{close}} to close the database connection and
+\code{\link[=open.DBIsvydesign]{open}} to reopen the connection, eg, after
+loading a saved object.
+
+The database interface does not attempt to modify the underlying
+database and so can be used with read-only permissions on the database.
+
+
+If \code{data} is an \code{imputationList} object (from the "mitools"
+package), \code{svydesign} will return a \code{svyimputationList} object
+containing a set of designs. Use \code{\link{with.svyimputationList}} to
+do analyses on these designs and \code{MIcombine} to combine the results.
+
+}
+
+\value{
+An object of class \code{survey.design}.
+}
+\author{Thomas Lumley}
+
+
+\seealso{
+ \code{\link{as.svrepdesign}} for converting to replicate weight designs,
+ \code{\link{subset.survey.design}} for domain estimates,
+ \code{\link{update.survey.design}} to add variables.
+
+ \code{mitools} package for using multiple imputations
+
+ \code{\link{svyrecvar}} and \code{\link{svyCprod}} for details of
+ variance estimation
+
+ \code{\link{election}} for examples of PPS sampling without replacement.
+
+ \url{http://faculty.washington.edu/tlumley/survey/} for examples of
+ database-backed objects.
+}
+
+
+
+\examples{
+ data(api)
+# stratified sample
+dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+# one-stage cluster sample
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+# two-stage cluster sample: weights computed from population sizes.
+dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
+
+## multistage sampling has no effect when fpc is not given, so
+## these are equivalent.
+dclus2wr<-svydesign(id=~dnum+snum, weights=weights(dclus2), data=apiclus2)
+dclus2wr2<-svydesign(id=~dnum, weights=weights(dclus2), data=apiclus2)
+
+## syntax for stratified cluster sample
+##(though the data weren't really sampled this way)
+svydesign(id=~dnum, strata=~stype, weights=~pw, data=apistrat,
+nest=TRUE)
+
+## PPS sampling without replacement
+data(election)
+dpps<- svydesign(id=~1, fpc=~p, data=election_pps, pps="brewer")
+
+##database example: requires RSQLite
+\dontrun{
+library(RSQLite)
+dbclus1<-svydesign(id=~dnum, weights=~pw, fpc=~fpc,
+data="apiclus1",dbtype="SQLite", dbname=system.file("api.db",package="survey"))
+
+}
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{univar}% __ONLY ONE__ keyword per line
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/svyfactanal.Rd b/man/svyfactanal.Rd
new file mode 100644
index 0000000..d49ff8b
--- /dev/null
+++ b/man/svyfactanal.Rd
@@ -0,0 +1,84 @@
+\name{svyfactanal}
+\Rdversion{1.1}
+\alias{svyfactanal}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Factor analysis in complex surveys (experimental).
+}
+\description{
+This function fits a factor analysis model or SEM, by maximum weighted likelihood.
+}
+\usage{
+svyfactanal(formula, design, factors,
+ n = c("none", "sample", "degf","effective", "min.effective"), ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{
+Model formula specifying the variables to use
+}
+ \item{design}{
+Survey design object
+}
+ \item{factors}{
+Number of factors to estimate
+}
+ \item{n}{
+Sample size to be used for testing: see below}
+ \item{\dots}{
+Other arguments to pass to \code{\link{factanal}}.
+}
+}
+
+\details{
+The population covariance matrix is estimated by \code{\link{svyvar}}
+and passed to \code{\link{factanal}}
+
+Although fitting these models requires only the estimated covariance
+matrix, inference requires a sample size. With \code{n="sample"}, the sample size is taken to be
+the number of observations; with \code{n="degf"}, the survey degrees of
+freedom as returned by \code{\link{degf}}. Using \code{"sample"}
+corresponds to standardizing weights to have mean 1, and is known to
+result in anti-conservative tests.
+
+The other two methods estimate an effective sample size for each
+variable as the sample size where the standard error of a variance of a
+Normal distribution would match the design-based standard error
+estimated by \code{\link{svyvar}}. With \code{n="min.effective"} the
+minimum sample size across the variables is used; with
+\code{n="effective"} the harmonic mean is used. For \code{svyfactanal}
+the test of model adequacy is optional, and the default choice,
+\code{n="none"}, does not do the test.
+
+}
+\value{
+An object of class \code{factanal}
+}
+\references{
+.
+}
+
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+\code{\link{factanal}}
+
+The \code{lavaan.survey} package fits structural equation models to complex samples using similar techniques.
+}
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+svyfactanal(~api99+api00+hsg+meals+ell+emer, design=dclus1, factors=2)
+
+svyfactanal(~api99+api00+hsg+meals+ell+emer, design=dclus1, factors=2, n="effective")
+
+##Population dat for comparison
+factanal(~api99+api00+hsg+meals+ell+emer, data=apipop, factors=2)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{multivariate}% __ONLY ONE__ keyword per line
diff --git a/man/svyglm.Rd b/man/svyglm.Rd
new file mode 100755
index 0000000..5c2d605
--- /dev/null
+++ b/man/svyglm.Rd
@@ -0,0 +1,157 @@
+\name{svyglm}
+\alias{svyglm}
+\alias{svyglm.survey.design}
+\alias{svyglm.svyrep.design}
+\alias{summary.svyglm}
+\alias{summary.svrepglm}
+\alias{vcov.svyglm}
+\alias{residuals.svyglm}
+\alias{residuals.svrepglm}
+\alias{predict.svyglm}
+\alias{predict.svrepglm}
+\alias{coef.svyglm}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Survey-weighted generalised linear models.}
+\description{
+Fit a generalised linear model to data from a complex survey design,
+with inverse-probability weighting and design-based standard errors.
+}
+\usage{
+\method{svyglm}{survey.design}(formula, design, subset=NULL, ...)
+\method{svyglm}{svyrep.design}(formula, design, subset=NULL, ..., rho=NULL,
+return.replicates=FALSE, na.action,multicore=getOption("survey.multicore"))
+\method{summary}{svyglm}(object, correlation = FALSE, df.resid=NULL,
+...)
+\method{predict}{svyglm}(object,newdata=NULL,total=NULL,
+ type=c("link","response","terms"),
+ se.fit=(type != "terms"),vcov=FALSE,...)
+\method{predict}{svrepglm}(object,newdata=NULL,total=NULL,
+ type=c("link","response","terms"),
+ se.fit=(type != "terms"),vcov=FALSE,
+ return.replicates=!is.null(object$replicates),...)
+
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{formula}{Model formula}
+ \item{design}{Survey design from \code{\link{svydesign}} or \code{\link{svrepdesign}}. Must contain all variables
+ in the formula}
+ \item{subset}{Expression to select a subpopulation}
+ \item{\dots}{Other arguments passed to \code{glm} or
+ \code{summary.glm} }
+ \item{rho}{For replicate BRR designs, to specify the parameter for
+ Fay's variance method, giving weights of \code{rho} and \code{2-rho}}
+ \item{return.replicates}{Return the replicates as a component of the
+ result? (for \code{predict}, only possible if they
+ were computed in the \code{svyglm} fit)}
+ \item{object}{A \code{svyglm} object}
+ \item{correlation}{Include the correlation matrix of parameters?}
+ \item{na.action}{Handling of NAs}
+ \item{multicore}{Use the \code{multicore} package to distribute
+ replicates across processors?}
+ \item{df.resid}{Optional denominator degrees of freedom for Wald
+ tests}
+ \item{newdata}{new data frame for prediction}
+ \item{total}{population size when predicting population total}
+ \item{type}{linear predictor (\code{link}) or response}
+ \item{se.fit}{if \code{TRUE}, return variances of predictions}
+ \item{vcov}{if \code{TRUE} and \code{se=TRUE} return full
+ variance-covariance matrix of predictions}
+}
+\details{
+There is no \code{anova} method for \code{svyglm} as the models are not
+fitted by maximum likelihood. The function \code{\link{regTermTest}} may
+be useful for testing sets of regression terms.
+
+For binomial and Poisson families use \code{family=quasibinomial()}
+and \code{family=quasipoisson()} to avoid a warning about non-integer
+numbers of successes. The `quasi' versions of the family objects give
+the same point estimates and standard errors and do not give the
+warning.
+
+If \code{df.resid} is not specified the df for the null model is
+computed by \code{\link{degf}} and the residual df computed by
+subtraction. This is recommended by Korn and Graubard and is correct
+for PSU-level covariates but is potentially very conservative for
+individual-level covariates. To get tests based on a Normal distribution
+use \code{df.resid=Inf}, and to use number of PSUs-number of strata,
+specify \code{df.resid=degf(design)}.
+
+Parallel processing with \code{multicore=TRUE} is helpful only for
+fairly large data sets and on computers with sufficient memory. It may
+be incompatible with GUIs, although the Mac Aqua GUI appears to be safe.
+
+\code{predict} gives fitted values and sampling variability for specific new
+values of covariates. When \code{newdata} are the population mean it
+gives the regression estimator of the mean, and when \code{newdata} are
+the population totals and \code{total} is specified it gives the
+regression estimator of the population total. Regression estimators of
+mean and total can also be obtained with \code{\link{calibrate}}.
+
+
+}
+
+ \value{ \code{svyglm} returns an object of class \code{svyglm}. The
+ \code{predict} method returns an object of class \code{svystat}}
+
+\author{Thomas Lumley}
+
+
+\seealso{
+ \code{\link{glm}}, which is used to do most of the work.
+
+ \code{\link{regTermTest}}, for multiparameter tests
+
+ \code{\link{calibrate}}, for an alternative way to specify regression
+ estimators of population totals or means
+
+ \code{\link{svyttest}} for one-sample and two-sample t-tests.
+}
+
+\examples{
+
+ data(api)
+
+
+ dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+ dclus2<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)
+ rstrat<-as.svrepdesign(dstrat)
+ rclus2<-as.svrepdesign(dclus2)
+
+ summary(svyglm(api00~ell+meals+mobility, design=dstrat))
+ summary(svyglm(api00~ell+meals+mobility, design=dclus2))
+ summary(svyglm(api00~ell+meals+mobility, design=rstrat))
+ summary(svyglm(api00~ell+meals+mobility, design=rclus2))
+
+ ## use quasibinomial, quasipoisson to avoid warning messages
+ summary(svyglm(sch.wide~ell+meals+mobility, design=dstrat,
+ family=quasibinomial()))
+
+
+ ## Compare regression and ratio estimation of totals
+ api.ratio <- svyratio(~api.stu,~enroll, design=dstrat)
+ pop<-data.frame(enroll=sum(apipop$enroll, na.rm=TRUE))
+ npop <- nrow(apipop)
+ predict(api.ratio, pop$enroll)
+
+ ## regression estimator is less efficient
+ api.reg <- svyglm(api.stu~enroll, design=dstrat)
+ predict(api.reg, newdata=pop, total=npop)
+ ## same as calibration estimator
+ svytotal(~api.stu, calibrate(dstrat, ~enroll, pop=c(npop, pop$enroll)))
+
+ ## svyglm can also reproduce the ratio estimator
+ api.reg2 <- svyglm(api.stu~enroll-1, design=dstrat,
+ family=quasi(link="identity",var="mu"))
+ predict(api.reg2, newdata=pop, total=npop)
+
+ ## higher efficiency by modelling variance better
+ api.reg3 <- svyglm(api.stu~enroll-1, design=dstrat,
+ family=quasi(link="identity",var="mu^3"))
+ predict(api.reg3, newdata=pop, total=npop)
+ ## true value
+ sum(apipop$api.stu)
+
+ }
+\keyword{regression}% at least one, from doc/KEYWORDS
+\keyword{survey}% at least one, from doc/KEYWORDS
diff --git a/man/svyhist.Rd b/man/svyhist.Rd
new file mode 100644
index 0000000..f02d63e
--- /dev/null
+++ b/man/svyhist.Rd
@@ -0,0 +1,57 @@
+\name{svyhist}
+\alias{svyhist}
+\alias{svyboxplot}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Histograms and boxplots}
+\description{
+Histograms and boxplots weighted by the sampling weights.
+}
+\usage{
+svyhist(formula, design, breaks = "Sturges",
+ include.lowest = TRUE, right = TRUE, xlab = NULL,
+ main = NULL, probability = TRUE, freq = !probability, ...)
+svyboxplot(formula, design, all.outliers=FALSE,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{One-sided formula for \code{svyhist}, two-sided for \code{svyboxplot}}
+ \item{design}{A survey design object}
+ \item{xlab}{x-axis label}
+ \item{main}{Main title}
+ \item{probability,freq}{Y-axis is probability density or frequency}
+ \item{all.outliers}{Show all outliers in the boxplot, not just extremes}
+ \item{breaks, include.lowest, right}{As for \code{\link{hist}}}
+ \item{\dots}{Other arguments to \code{\link{hist}} or \code{\link{bxp}}}
+}
+
+\details{
+The histogram breakpoints are computed as if the sample were a
+simple random sample of the same size.
+
+The grouping variable in \code{svyboxplot}, if present, must be a factor.
+
+The boxplot whiskers go to the maximum and minimum observations or to
+1.5 interquartile ranges beyond the end of the box, whichever is
+closer. The maximum and minimum are plotted as outliers if they are
+beyond the ends of the whiskers, but other outlying points are not
+plotted unless \code{all.outliers=TRUE}. \code{svyboxplot}
+requires a two-sided formula; use \code{variable~1} for a single boxplot.
+ }
+
+\seealso{ \code{\link{svyplot}}}
+\examples{
+data(api)
+dstrat <- svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat,
+ fpc = ~fpc)
+opar<-par(mfrow=c(1,3))
+svyhist(~enroll, dstrat, main="Survey weighted",col="purple",ylim=c(0,1.3e-3))
+hist(apistrat$enroll, main="Sample unweighted",col="purple",prob=TRUE,ylim=c(0,1.3e-3))
+hist(apipop$enroll, main="Population",col="purple",prob=TRUE,ylim=c(0,1.3e-3))
+
+par(mfrow=c(1,1))
+svyboxplot(enroll~stype,dstrat,all.outliers=TRUE)
+svyboxplot(enroll~1,dstrat)
+par(opar)
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{hplot}% __ONLY ONE__ keyword per line
diff --git a/man/svykappa.Rd b/man/svykappa.Rd
new file mode 100644
index 0000000..1e23be8
--- /dev/null
+++ b/man/svykappa.Rd
@@ -0,0 +1,35 @@
+\name{svykappa}
+\alias{svykappa}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Cohen's kappa for agreement}
+\description{
+ Computes the unweighted kappa measure of agreement between two raters
+ and the standard error. The measurements must both be factor variables
+ in the survey design object.
+}
+\usage{
+svykappa(formula, design, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{one-sided formula giving two measurements}
+ \item{design}{survey design object}
+ \item{\dots}{for future expansion }
+}
+\value{
+ Object of class \code{svystat}
+}
+
+\seealso{ \code{\link{svycontrast}}}
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+svykappa(~comp.imp+sch.wide, dclus1)
+
+dclus1<-update(dclus1, stypecopy=stype)
+svykappa(~stype+stypecopy,dclus1)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/svykm.Rd b/man/svykm.Rd
new file mode 100644
index 0000000..1680ec7
--- /dev/null
+++ b/man/svykm.Rd
@@ -0,0 +1,109 @@
+\name{svykm}
+\alias{svykm}
+\alias{plot.svykm}
+\alias{plot.svykmlist}
+\alias{lines.svykm}
+\alias{quantile.svykm}
+\alias{confint.svykm}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Estimate survival function. }
+\description{
+Estimates the survival function using a weighted Kaplan-Meier
+estimator.
+}
+\usage{
+svykm(formula, design,se=FALSE, ...)
+\method{plot}{svykm}(x,xlab="time",ylab="Proportion surviving",
+ ylim=c(0,1),ci=NULL,lty=1,...)
+\method{lines}{svykm}(x,xlab="time",type="s",ci=FALSE,lty=1,...)
+\method{plot}{svykmlist}(x, pars=NULL, ci=FALSE,...)
+\method{quantile}{svykm}(x, probs=c(0.75,0.5,0.25),ci=FALSE,level=0.95,...)
+\method{confint}{svykm}(object,parm,level=0.95,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{Two-sided formula. The response variable should be a right-censored
+ \code{Surv} object}
+ \item{design}{survey design object}
+ \item{se}{Compute standard errors? This is slow for moderate to large
+ data sets}
+ \item{\dots}{in \code{plot} and \code{lines} methods, graphical
+parameters }
+ \item{x}{a \code{svykm} or \code{svykmlist} object}
+ \item{xlab,ylab,ylim,type}{as for \code{plot}}
+ \item{lty}{Line type, see \code{\link{par}}}
+ \item{ci}{Plot (or return, for\code{quantile}) the confidence interval}
+ \item{pars}{A list of vectors of graphical parameters for the
+ separate curves in a \code{svykmlist} object}
+ \item{object}{A \code{svykm} object}
+ \item{parm}{vector of times to report confidence intervals}
+ \item{level}{confidence level}
+ \item{probs}{survival probabilities for computing survival quantiles
+ (note that these are the complement of the usual
+ \code{\link{quantile}} input, so 0.9 means 90\% surviving, not 90\% dead)}
+}
+\value{
+ For \code{svykm}, an object of class \code{svykm} for a single curve or \code{svykmlist}
+ for multiple curves.
+}
+\details{
+ When standard errors are computed, the survival curve is
+ actually the Aalen (hazard-based) estimator rather than the
+ Kaplan-Meier estimator.
+
+ The standard error computations use memory proportional to the sample
+ size times the square of the number of events. This can be a lot.
+
+ In the case of equal-probability cluster sampling without replacement
+ the computations are essentially the same as those of Williams (1995),
+ and the same linearization strategy is used for other designs.
+
+ Confidence intervals are computed on the log(survival) scale,
+ following the default in \code{survival} package, which was based on
+ simulations by Link(1984).
+
+ Confidence intervals for quantiles use Woodruff's method: the interval
+ is the intersection of the horizontal line at the specified quantile
+ with the pointwise confidence band around the survival curve.
+}
+\references{
+Link, C. L. (1984). Confidence intervals for the survival function using
+Cox's proportional hazards model with covariates. Biometrics 40,
+601-610.
+
+Williams RL (1995) "Product-Limit Survival Functions with Correlated
+Survival Times" Lifetime Data Analysis 1: 171--186
+
+Woodruff RS (1952) Confidence intervals for medians and other
+position measures. JASA 57, 622-627.
+ }
+ \seealso{\code{\link{predict.svycoxph}} for survival curves from a Cox model
+ }
+\examples{
+data(pbc, package="survival")
+pbc$randomized <- with(pbc, !is.na(trt) & trt>0)
+biasmodel<-glm(randomized~age*edema,data=pbc)
+pbc$randprob<-fitted(biasmodel)
+
+dpbc<-svydesign(id=~1, prob=~randprob, strata=~edema, data=subset(pbc,randomized))
+
+s1<-svykm(Surv(time,status>0)~1, design=dpbc)
+s2<-svykm(Surv(time,status>0)~I(bili>6), design=dpbc)
+
+plot(s1)
+plot(s2)
+plot(s2, lwd=2, pars=list(lty=c(1,2),col=c("purple","forestgreen")))
+
+quantile(s1, probs=c(0.9,0.75,0.5,0.25,0.1))
+
+s3<-svykm(Surv(time,status>0)~I(bili>6), design=dpbc,se=TRUE)
+plot(s3[[2]],col="purple")
+
+confint(s3[[2]], parm=365*(1:5))
+quantile(s3[[1]], ci=TRUE)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{survival}% __ONLY ONE__ keyword per line
diff --git a/man/svyloglin.Rd b/man/svyloglin.Rd
new file mode 100644
index 0000000..ec28271
--- /dev/null
+++ b/man/svyloglin.Rd
@@ -0,0 +1,91 @@
+\name{svyloglin}
+\alias{svyloglin}
+\alias{anova.svyloglin}
+\alias{update.svyloglin}
+\alias{coef.svyloglin}
+\alias{print.anova.svyloglin}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Loglinear models }
+\description{
+Fit and compare hierarchical loglinear models for complex survey data.
+}
+\usage{
+svyloglin(formula, design, ...)
+\method{update}{svyloglin}(object,formula,...)
+\method{anova}{svyloglin}(object,object1,...,integrate=FALSE)
+\method{print}{anova.svyloglin}(x,pval=c("F","saddlepoint","lincom","chisq"),...)
+\method{coef}{svyloglin}(object,...,intercept=FALSE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{Model formula}
+ \item{design}{survey design object}
+ \item{object,object1}{loglinear model from \code{svyloglin}}
+ \item{pval}{p-value approximation: see Details}
+ \item{integrate}{Compute the exact asymptotic p-value (slow)?}
+ \item{\dots}{not used }
+ \item{intercept}{Report the intercept?}
+ \item{x}{anova object}
+}
+\details{
+The loglinear model is fitted to a multiway table with probabilities
+estimated by \code{\link{svymean}} and with the sample size equal to the
+observed sample size, treating the resulting table as if it came from iid
+multinomial sampling, as described by Rao and Scott. The
+variance-covariance matrix does not include the intercept term, and so
+by default neither does the \code{coef} method. A Newton-Raphson
+algorithm is used, rather than iterative proportional fitting, so
+starting values are not needed.
+
+The \code{anova} method computes the quantities that would be the score
+(Pearson) and likelihood ratio chi-squared statistics if the data were
+an iid sample. It computes four p-values for each of these, based on the
+exact asymptotic distribution (see \code{\link{pchisqsum}}), a
+saddlepoint approximateion to this distribution, a scaled
+chi-squared distribution, and a scaled F-distribution. When testing the
+two-way interaction model against the main-effects model in a two-way
+table the score statistic and p-values match the Rao-Scott tests
+computed by \code{\link{svychisq}}.
+
+The \code{anova} method can only compare two models if they are for
+exactly the same multiway table (same variables and same order). The
+\code{update} method will help with this. It is also much faster to use
+\code{update} than \code{svyloglin} for a large data set: its time
+complexity depends only on the size of the model, not on the size of the
+data set.
+
+ It is not possible to fit a model using a variable created inline, eg
+\code{I(x<10)}, since the multiway table is based on all variables used
+in the formula.
+}
+\value{
+ Object of class \code{"svyloglin"}
+}
+\references{
+Rao, JNK, Scott, AJ (1984) "On Chi-squared Tests For Multiway Contingency Tables with Proportions Estimated From Survey Data" Annals of Statistics 12:46-60.
+}
+
+\seealso{\code{\link{svychisq}}, \code{\link{svyglm}},\code{\link{pchisqsum}}}
+\examples{
+ data(api)
+ dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+ a<-svyloglin(~stype+comp.imp,dclus1)
+ b<-update(a,~.^2)
+ an<-anova(a,b)
+ an
+ print(an, pval="saddlepoint")
+
+ ## Wald test
+ regTermTest(b, ~stype:comp.imp)
+
+ ## linear-by-linear association
+ d<-update(a,~.+as.numeric(stype):as.numeric(comp.imp))
+ an1<-anova(a,d)
+ an1
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/svylogrank.Rd b/man/svylogrank.Rd
new file mode 100644
index 0000000..6d18328
--- /dev/null
+++ b/man/svylogrank.Rd
@@ -0,0 +1,80 @@
+\name{svylogrank}
+\alias{svylogrank}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Compare survival distributions
+}
+\description{
+Computes a weighted version of the logrank and stratified logrank tests
+for comparing two or more survival distributions. The generalization to
+complex samples is based on the characterization of the logrank test as
+the score test in a Cox model, Under simple random sampling with
+replacement, this function with \code{rho=0} and \code{gamma=0}
+is almost identical to the robust score test
+in the survival package.
+}
+\usage{
+svylogrank(formula, design, rho=0,gamma=0,method=c("small","large","score"), ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{
+ Model formula with a single predictor and optionally a \code{\link{strata}} term. The predictor must be a factor if it has more than two levels.
+}
+ \item{design}{
+ A survey design object
+}
+\item{rho,gamma}{Coefficients for the Harrington/Fleming G-rho-gamma
+ tests. The default is the logrank test, \code{rho=1} gives a
+ generalised Wilcoxon test}
+\item{method}{\code{"small"} works faster when a matrix with dimension
+ number of events by number of people fits easily in memory;
+ \code{"large"} works faster for large data sets; \code{"score"} works
+by brute-force construction of an expanded data set, and is for debugging}
+ \item{\dots}{
+ for future expansion.
+}
+}
+
+\value{
+A vector containing the z-statistic for comparing each level of the variable to the lowest, the chisquared statistic for the logrank test, and the p-value.
+}
+
+\references{
+Rader, Kevin Andrew. 2014. Methods for Analyzing Survival and
+Binary Data in Complex Surveys. Doctoral dissertation, Harvard
+University.\url{http://nrs.harvard.edu/urn-3:HUL.InstRepos:12274283}
+}
+
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+\code{\link{svykm}}, \code{\link{svycoxph}}.
+}
+\examples{
+library("survival")
+data(nwtco)
+## stratified on case status
+dcchs<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
+ subset=~I(in.subcohort | rel), data=nwtco, method="simple")
+svylogrank(Surv(edrel,rel)~factor(stage),design=dcchs)
+
+data(pbc, package="survival")
+pbc$randomized <- with(pbc, !is.na(trt) & trt>0)
+biasmodel<-glm(randomized~age*edema,data=pbc)
+pbc$randprob<-fitted(biasmodel)
+dpbc<-svydesign(id=~1, prob=~randprob, strata=~edema, data=subset(pbc,randomized))
+
+svylogrank(Surv(time,status==2)~trt,design=dpbc)
+
+svylogrank(Surv(time,status==2)~trt,design=dpbc,rho=1)
+
+rpbc<-as.svrepdesign(dpbc)
+svylogrank(Surv(time,status==2)~trt,design=rpbc)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{survival}% __ONLY ONE__ keyword per line
diff --git a/man/svymle.Rd b/man/svymle.Rd
new file mode 100755
index 0000000..e8379e6
--- /dev/null
+++ b/man/svymle.Rd
@@ -0,0 +1,150 @@
+\name{svymle}
+\alias{svymle}
+\alias{print.svymle}
+\alias{coef.svymle}
+\alias{summary.svymle}
+\alias{vcov.svymle}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Maximum pseudolikelihood estimation in complex surveys}
+\description{
+Fits a user-specified likelihood parametrised by multiple linear
+predictors to data from a complex sample survey and computes the
+sandwich variance estimator of the coefficients. Note that this function
+maximises an estimated population likelihood, it is not the sample MLE.
+}
+\usage{
+svymle(loglike, gradient = NULL, design, formulas, start = NULL, control
+= list(maxit=1000), na.action="na.fail", method=NULL, ...)
+\method{summary}{svymle}(object, stderr=c("robust", "model"),...)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{loglike}{vectorised loglikelihood function}
+ \item{gradient}{Derivative of \code{loglike}. Required for variance computation and helpful for fitting}
+ \item{design}{ a \code{survey.design} object }
+ \item{formulas}{A list of formulas specifying the variable and linear predictors: see Details below}
+ \item{start}{Starting values for parameters}
+ \item{control}{control options for \code{\link{optim}}}
+ \item{na.action}{Handling of \code{NA}s}
+ \item{method}{\code{"nlm"} to use \code{nlm}, otherwise passed to \code{\link{optim}}}
+ \item{\dots}{Arguments to \code{loglike} and \code{gradient} that are
+ not to be optimised over.}
+ \item{object}{\code{svymle} object}
+ \item{stderr}{Choice of standard error estimator. The default is a
+ standard sandwich estimator. See Details below.}
+}
+\details{
+Optimization is done by \code{\link{nlm}} by default or if
+\code{method=="nlm"}. Otherwise \code{\link{optim}} is used and \code{method}
+specifies the method and \code{control} specifies control parameters.
+
+ The \code{design} object contains all the data and design information
+from the survey, so all the formulas refer to variables in this object.
+The \code{formulas} argument needs to specify the response variable and
+a linear predictor for each freely varying argument of \code{loglike}.
+
+Consider for example the \code{\link{dnorm}} function, with arguments
+\code{x}, \code{mean}, \code{sd} and \code{log}, and suppose we want to
+estimate the mean of \code{y} as a linear function of a variable
+\code{z}, and to estimate a constant standard deviation. The \code{log}
+argument must be fixed at \code{FALSE} to get the loglikelihood. A
+\code{formulas} argument would be \code{list(~y, mean=~z, sd=~1)}. Note
+that the data variable \code{y} must be the first argument to
+\code{dnorm} and the first formula and that all the other formulas are
+labelled. It is also permitted to have the data variable as the
+left-hand side of one of the formulas: eg \code{list( mean=y~z, sd=~1)}.
+
+
+The usual variance estimator for MLEs in a survey sample is a `sandwich'
+variance that requires the score vector and the information matrix. It
+requires only sampling assumptions to be valid (though some model
+assumptions are required for it to be useful). This is the
+\code{stderr="robust"} option, which is available only when the \code{gradient}
+argument was specified.
+
+If the model is correctly specified and the sampling is at random
+conditional on variables in the model then standard errors based on just
+the information matrix will be approximately valid. In particular, for
+independent sampling where weights and strata depend on variables in the
+model the \code{stderr="model"} should work fairly well.
+}
+\value{
+ An object of class \code{svymle}
+}
+\author{Thomas Lumley}
+
+\seealso{\code{\link{svydesign}}, \code{\link{svyglm}}}
+
+\examples{
+
+ data(api)
+
+ dstrat<-svydesign(id=~1, strata=~stype, weight=~pw, fpc=~fpc, data=apistrat)
+
+ ## fit with glm
+ m0 <- svyglm(api00~api99+ell,family="gaussian",design=dstrat)
+ ## fit as mle (without gradient)
+ m1 <- svymle(loglike=dnorm,gradient=NULL, design=dstrat,
+ formulas=list(mean=api00~api99+ell, sd=~1),
+ start=list(c(80,1,0),c(20)), log=TRUE)
+ ## with gradient
+ gr<- function(x,mean,sd,log){
+ dm<-2*(x - mean)/(2*sd^2)
+ ds<-(x-mean)^2*(2*(2 * sd))/(2*sd^2)^2 - sqrt(2*pi)/(sd*sqrt(2*pi))
+ cbind(dm,ds)
+ }
+ m2 <- svymle(loglike=dnorm,gradient=gr, design=dstrat,
+ formulas=list(mean=api00~api99+ell, sd=~1),
+ start=list(c(80,1,0),c(20)), log=TRUE, method="BFGS")
+
+ summary(m0)
+ summary(m1,stderr="model")
+ summary(m2)
+
+ ## More complicated censored data example
+ ## showing that the response variable can be multivariate
+
+ data(pbc, package="survival")
+ pbc$randomized <- with(pbc, !is.na(trt) & trt>0)
+ biasmodel<-glm(randomized~age*edema,data=pbc)
+ pbc$randprob<-fitted(biasmodel)
+ dpbc<-svydesign(id=~1, prob=~randprob, strata=~edema,
+ data=subset(pbc,randomized))
+
+ lcens<-function(x,mean,sd){
+ ifelse(x[,2]==1,
+ dnorm(log(x[,1]),mean,sd,log=TRUE),
+ pnorm(log(x[,1]),mean,sd,log=TRUE,lower.tail=FALSE)
+ )
+ }
+
+ gcens<- function(x,mean,sd){
+
+ dz<- -dnorm(log(x[,1]),mean,sd)/pnorm(log(x[,1]),mean,sd,lower.tail=FALSE)
+
+ dm<-ifelse(x[,2]==1,
+ 2*(log(x[,1]) - mean)/(2*sd^2),
+ dz*-1/sd)
+ ds<-ifelse(x[,2]==1,
+ (log(x[,1])-mean)^2*(2*(2 * sd))/(2*sd^2)^2 - sqrt(2*pi)/(sd*sqrt(2*pi)),
+ ds<- dz*-(log(x[,1])-mean)/(sd*sd))
+ cbind(dm,ds)
+ }
+
+if(!is.null(pbc$albumin)){
+ svymle(loglike=lcens, gradient=gcens, design=dpbc,
+ formulas=list(mean=I(cbind(time,status>0))~bili+protime+albumin,
+ sd=~1),
+ start=list(c(10,0,0,0),c(1)))
+} else {
+ svymle(loglike=lcens, gradient=gcens, design=dpbc,
+ formulas=list(mean=I(cbind(time,status>0))~bili+protime+alb,
+ sd=~1),
+ start=list(c(10,0,0,0),c(1)))
+}
+
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{models}% __ONLY ONE__ keyword per line
+\keyword{optimize}% __ONLY ONE__ keyword per line
diff --git a/man/svyolr.Rd b/man/svyolr.Rd
new file mode 100644
index 0000000..e488e5a
--- /dev/null
+++ b/man/svyolr.Rd
@@ -0,0 +1,53 @@
+\name{svyolr}
+\alias{svyolr}
+\alias{svyolr.survey.design2}
+\alias{svyolr.svyrep.design}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Proportional odds and related models }
+\description{
+ Fits cumulative link models: proportional odds, probit, complementary
+ log-log, and cauchit.
+}
+\usage{
+svyolr(formula, design, ...)
+\method{svyolr}{survey.design2}(formula, design, start, ..., na.action = na.omit, method = c("logistic",
+ "probit", "cloglog", "cauchit"))
+\method{svyolr}{svyrep.design}(formula,design,...,return.replicates=FALSE,
+ multicore=getOption("survey.multicore"))
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{Formula: the response must be a factor with at least
+ three levels}
+ \item{design}{survey design object }
+ \item{\dots}{dots}
+ \item{start}{Optional starting values for optimization}
+ \item{na.action}{handling of missing values}
+ \item{multicore}{Use \code{multicore} package to distribute computation of replicates across multiple
+ processors?}
+ \item{method}{Link function}
+ \item{return.replicates}{return the individual replicate-weight estimates}
+}
+\value{
+ An object of class \code{svyolr}
+}
+
+\author{The code is based closely on polr() from the MASS package of
+ Venables and Ripley.}
+
+\seealso{\code{\link{svyglm}}, \code{\link{regTermTest}} }
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+dclus1<-update(dclus1, mealcat=cut(meals,c(0,25,50,75,100)))
+
+m<-svyolr(mealcat~avg.ed+mobility+stype, design=dclus1)
+m
+
+## Use regTermTest for testing multiple parameters
+regTermTest(m, ~avg.ed+stype, method="LRT")
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/svyplot.Rd b/man/svyplot.Rd
new file mode 100755
index 0000000..63ccaac
--- /dev/null
+++ b/man/svyplot.Rd
@@ -0,0 +1,102 @@
+\name{svyplot}
+\alias{svyplot}
+\alias{svyplot.default}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Plots for survey data }
+\description{
+ Because observations in survey samples may represent very different
+ numbers of units in the population ordinary plots can be misleading.
+ The \code{svyplot} function produces scatterplots adjusted in various ways
+ for sampling weights.
+}
+\usage{
+svyplot(formula, design,...)
+\method{svyplot}{default}(formula, design, style = c("bubble", "hex", "grayhex","subsample","transparent"),
+sample.size = 500, subset = NULL, legend = 1, inches = 0.05,
+amount=NULL, basecol="black",
+alpha=c(0, 0.8),xbins=30,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{A model formula}
+ \item{design}{ A survey object (svydesign or svrepdesign)}
+ \item{style}{See Details below}
+ \item{sample.size}{For \code{style="subsample"}}
+ \item{subset}{expression using variables in the design object}
+ \item{legend}{For \code{style="hex"} or \code{"grayhex"}}
+ \item{inches}{Scale for bubble plots}
+ \item{amount}{list with \code{x} and \code{y} components for amount of
+ jittering to use in subsample plots, or \code{NULL} for the default
+ amount}
+ \item{basecol}{base color for transparent plots, or a function to
+ compute the color (see below), or color for bubble plots}
+ \item{alpha}{minimum and maximum opacity for transparent plots}
+\item{xbins}{Number of (x-axis) bins for hexagonal binning}
+ \item{\dots}{Passed to \code{plot} methods}
+}
+\details{
+ Bubble plots are scatterplots with circles whose area is proportional
+ to the sampling weight. The two "hex" styles produce hexagonal
+ binning scatterplots, and require the \code{hexbin} package from
+ Bioconductor. The "transparent" style plots points with opacity
+ proportional to sampling weight.
+
+
+ The \code{subsample} method uses the sampling weights to create a
+ sample from approximately the population distribution and passes this to \code{\link{plot}}
+
+ Bubble plots are suited to small surveys, hexagonal binning and
+ transparency to large surveys where plotting all the points would
+ result in too much overlap.
+
+ \code{basecol} can be a function taking one data frame argument, which
+ will be passed the data frame of variables from the survey object.
+ This could be memory-intensive for large data sets.
+}
+\value{
+ None
+}
+
+\seealso{
+ \code{\link{symbols}} for other options (such as colour) for bubble
+ plots.
+
+ \code{\link{svytable}} for plots of discrete data.
+}
+\examples{
+data(api)
+dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+
+svyplot(api00~api99, design=dstrat, style="bubble")
+svyplot(api00~api99, design=dstrat, style="transparent",pch=19)
+
+## these two require the hexbin package
+svyplot(api00~api99, design=dstrat, style="hex", xlab="1999 API",ylab="2000 API")
+svyplot(api00~api99, design=dstrat, style="grayhex",legend=0)
+
+
+dclus2<-svydesign(id=~dnum+snum, weights=~pw,
+ data=apiclus2, fpc=~fpc1+fpc2)
+svyplot(api00~api99, design=dclus2, style="subsample")
+svyplot(api00~api99, design=dclus2, style="subsample",
+ amount=list(x=25,y=25))
+
+svyplot(api00~api99, design=dstrat,
+ basecol=function(df){c("goldenrod","tomato","sienna")[as.numeric(df$stype)]},
+ style="transparent",pch=19,alpha=c(0,1))
+legend("topleft",col=c("goldenrod","tomato","sienna"), pch=19, legend=c("E","H","M"))
+
+## For discrete data, estimate a population table and plot the table.
+plot(svytable(~sch.wide+comp.imp+stype,design=dstrat))
+fourfoldplot(svytable(~sch.wide+comp.imp+stype,design=dstrat,round=TRUE))
+
+
+## To draw on a hexbin plot you need grid graphics, eg,
+library(grid)
+h<-svyplot(api00~api99, design=dstrat, style="hex", xlab="1999 API",ylab="2000 API")
+s<-svysmooth(api00~api99,design=dstrat)
+grid.polyline(s$api99$x,s$api99$y,vp=h$plot.vp at hexVp.on,default.units="native",
+ gp=gpar(col="red",lwd=2))
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{hplot}% __ONLY ONE__ keyword per line
diff --git a/man/svyprcomp.Rd b/man/svyprcomp.Rd
new file mode 100644
index 0000000..2b50c5f
--- /dev/null
+++ b/man/svyprcomp.Rd
@@ -0,0 +1,91 @@
+\name{svyprcomp}
+\Rdversion{1.1}
+\alias{svyprcomp}
+\alias{biplot.svyprcomp}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Sampling-weighted principal component analysis
+}
+\description{
+Computes principal components using the sampling weights.
+}
+\usage{
+svyprcomp(formula, design, center = TRUE, scale. = FALSE, tol = NULL, scores = FALSE, ...)
+\method{biplot}{svyprcomp}(x, cols=c("black","darkred"),xlabs=NULL,
+ weight=c("transparent","scaled","none"),
+ max.alpha=0.5,max.cex=0.5,xlim=NULL,ylim=NULL,pc.biplot=FALSE,
+ expand=1,xlab=NULL,ylab=NULL, arrow.len=0.1, ...)
+}
+\arguments{
+ \item{formula}{
+model formula describing variables to be used
+}
+ \item{design}{
+survey design object.
+}
+ \item{center}{
+Center data before analysis?
+}
+ \item{scale.}{
+Scale to unit variance before analysis?
+}
+ \item{tol}{
+Tolerance for omitting components from the results; a proportion of the standard deviation of the first component. The default is to keep all components.
+}
+ \item{scores}{
+Return scores on each component? These are needed for \code{biplot}.
+}
+\item{x}{
+A \code{svyprcomp} object
+}
+\item{cols}{
+Base colors for observations and variables respectively
+}
+\item{xlabs}{
+ Formula, or character vector, giving labels for each observation
+}
+\item{weight}{
+ How to display the sampling weights: \code{"scaled"} changes the size of the point label, \code{"transparent"} uses opacity proportional to sampling weight, \code{"none"} changes neither.
+}
+\item{max.alpha}{
+ Opacity for the largest sampling weight, or for all points if \code{weight!="transparent"}
+}
+\item{max.cex}{
+ Character size (as a multiple of \code{par("cex")}) for the largest sampling weight, or for all points if \code{weight!="scaled"}
+}
+\item{xlim,ylim,xlab,ylab}{Graphical parameters}
+\item{expand,arrow.len}{See \code{\link{biplot}}}
+\item{pc.biplot}{See \code{link{biplot.prcomp}}}
+ \item{\dots}{
+Other arguments to \code{\link{prcomp}}, or graphical parameters for \code{biplot}
+}
+
+}
+
+\value{
+\code{svyprcomp} returns an object of class \code{svyprcomp}, similar to
+class \code{prcomp} but including design information
+
+}
+
+\seealso{
+\code{\link{prcomp}}, \code{\link{biplot.prcomp}}
+}
+\examples{
+data(api)
+dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
+
+pc <- svyprcomp(~api99+api00+ell+hsg+meals+emer, design=dclus2,scale=TRUE,scores=TRUE)
+pc
+biplot(pc, xlabs=~dnum, weight="none")
+
+biplot(pc, xlabs=~dnum,max.alpha=1)
+
+biplot(pc, weight="scaled",max.cex=1.5, xlabs=~dnum)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{hplot}% __ONLY ONE__ keyword per line
+\keyword{multivariate}
\ No newline at end of file
diff --git a/man/svypredmeans.Rd b/man/svypredmeans.Rd
new file mode 100644
index 0000000..9ca19d1
--- /dev/null
+++ b/man/svypredmeans.Rd
@@ -0,0 +1,57 @@
+\name{svypredmeans}
+\alias{svypredmeans}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Predictive marginal means
+}
+\description{
+Predictive marginal means for a generalised linear model, using the method of Korn and Graubard (1999) and matching the results of SUDAAN. The predictive marginal mean for one level of a factor is the probability-weighted average of the fitted values for the model on new data where all the observations are set to that level of the factor but have whatever values of adjustment variables they really have.
+}
+\usage{
+svypredmeans(adjustmodel, groupfactor)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{adjustmodel}{
+A generalised linear model fit by \code{\link{svyglm}} with the adjustment variable but without the factor for which predictive means are wanted
+}
+ \item{groupfactor}{
+A one-sided formula specifying the factor for which predictive means are wanted. Can use, eg, \code{~interaction(race,sex)} for combining variables}
+
+}
+
+\value{
+An object of class \code{svystat} with the predictive marginal means and their covariance matrix.
+}
+\references{
+Graubard B, Korn E (1999) "Predictive Margins with Survey Data" Biometrics 55:652-659
+
+Bieler, Brown, Williams, & Brogan (2010) "Estimating Model-Adjusted Risks, Risk Differences, and Risk Ratios From Complex Survey Data" Am J Epi DOI: 10.1093/aje/kwp440}
+\note{
+It is possible to supply an adjustment model with only an intercept, but the results are then the same as \code{\link{svymean}}
+
+It makes no sense to have a variable in the adjustment model that is part of the grouping factor, and will give an error message or \code{NA}.
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+\code{\link{svyglm}}
+
+Worked example using National Health Interview Survey data: \url{https://gist.github.com/tslumley/2e74cd0ac12a671d2724}
+}
+\examples{
+data(nhanes)
+nhanes_design <- svydesign(id=~SDMVPSU, strata=~SDMVSTRA, weights=~WTMEC2YR, nest=TRUE,data=nhanes)
+agesexmodel<-svyglm(HI_CHOL~agecat+RIAGENDR, design=nhanes_design,family=quasibinomial)
+## high cholesterol by race/ethnicity, adjusted for demographic differences
+means<-svypredmeans(agesexmodel, ~race)
+means
+## relative risks compared to non-Hispanic white
+svycontrast(means,quote(`1`/`2`))
+svycontrast(means,quote(`3`/`2`))
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{regression}% __ONLY ONE__ keyword per line
diff --git a/man/svyquantile.Rd b/man/svyquantile.Rd
new file mode 100755
index 0000000..a89f32a
--- /dev/null
+++ b/man/svyquantile.Rd
@@ -0,0 +1,168 @@
+\name{svyquantile}
+\alias{svyquantile}
+\alias{print.svyquantile}
+\alias{SE.svyquantile}
+\alias{svyquantile.survey.design}
+\alias{svyquantile.svyrep.design}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Quantiles for sample surveys}
+\description{
+Compute quantiles for data from complex surveys.
+}
+\usage{
+\method{svyquantile}{survey.design}(x, design, quantiles, alpha=0.05,
+ ci=FALSE, method = "linear", f = 1,
+ interval.type=c("Wald","score","betaWald"), na.rm=FALSE,se=ci,
+ ties=c("discrete","rounded"), df=Inf,...)
+\method{svyquantile}{svyrep.design}(x, design, quantiles,
+ method ="linear", interval.type=c("probability","quantile"), f = 1,
+ return.replicates=FALSE, ties=c("discrete","rounded"),na.rm=FALSE,...)
+\method{SE}{svyquantile}(object,...)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{x}{A formula, vector or matrix}
+ \item{design}{\code{survey.design} or \code{svyrep.design} object}
+ \item{quantiles}{Quantiles to estimate}
+ \item{method}{see \code{\link{approxfun}}}
+ \item{f}{see \code{\link{approxfun}}}
+ \item{ci}{Compute a confidence interval? (relatively slow; needed for \code{\link{svyby}})}
+ \item{se}{Compute standard errors from the confidence interval length?}
+ \item{alpha}{Level for confidence interval}
+ \item{interval.type}{See Details below}
+ \item{ties}{See Details below}
+ \item{df}{Degrees of freedom for a t-distribution. \code{Inf} requests a Normal distribution,
+ \code{NULL} uses \code{\link{degf}}. Not relevant for \code{type="betaWald"}}
+ \item{return.replicates}{Return the replicate means?}
+ \item{na.rm}{Remove \code{NA}s?}
+ \item{...}{arguments for future expansion}
+ \item{object}{Object returned by \code{svyquantile.survey.design}}
+}
+\details{
+ The definition of the CDF and thus of the quantiles is ambiguous in
+ the presence of ties. With \code{ties="discrete"} the data are
+ treated as genuinely discrete, so the CDF has vertical steps at tied
+ observations. With \code{ties="rounded"} all the weights for tied
+ observations are summed and the CDF interpolates linearly between
+ distinct observed values, and so is a continuous function. Combining
+ \code{interval.type="betaWald"} and \code{ties="discrete"} is (close
+ to) the proposal of Shah and Vaish(2006) used in some versions of SUDAAN.
+
+ Interval estimation for quantiles is complicated, because the
+ influence function is not continuous. Linearisation cannot be used
+ directly, and computing the variance of replicates is valid only for
+ some designs (eg BRR, but not jackknife). The \code{interval.type}
+ option controls how the intervals are computed.
+
+ For \code{survey.design} objects the default is
+ \code{interval.type="Wald"}. A 95\% Wald confidence interval is
+ constructed for the proportion below the estimated quantile. The
+ inverse of the estimated CDF is used to map this to a confidence
+ interval for the quantile. This is the method of Woodruff
+ (1952). For \code{"betaWald"} the same procedure is used, but the
+ confidence interval for the proportion is computed using the exact
+ binomial cdf with an effective sample size proposed by Korn &
+ Graubard (1998).
+
+
+ If \code{interval.type="score"} we use a method described by Binder
+ (1991) and due originally to Francisco and Fuller (1986), which
+ corresponds to inverting a robust score test. At the upper and lower
+ limits of the confidence interval, a test of the null hypothesis that
+ the cumulative distribution function is equal to the target quantile
+ just rejects. This was the default before version 2.9. It is much
+ slower than \code{"Wald"}, and Dorfman & Valliant (1993) suggest it is
+ not any more accurate.
+
+ Standard errors are computed from these confidence intervals by
+ dividing the confidence interval length by \code{2*qnorm(alpha/2)}.
+
+ For replicate-weight designs, ordinary replication-based standard errors
+ are valid for BRR and Fay's method, and for some bootstrap-based
+ designs, but not for jackknife-based designs.
+ \code{interval.type="quantile"} gives these replication-based
+ standard errors. The default, \code{interval.type="probability"}
+ computes confidence on the probability scale and then transforms
+ back to quantiles, the equivalent of \code{interval.type="Wald"} for
+ \code{survey.design} objects (with \code{alpha=0.05}).
+
+ There is a \code{confint} method for \code{svyquantile} objects; it
+ simply extracts the pre-computed confidence interval.
+
+}
+\value{
+ returns a list whose first component is the quantiles and second
+ component is the confidence intervals. For replicate weight designs,
+ returns an object of class \code{svyrepstat}.
+}
+
+
+\author{Thomas Lumley}
+
+
+\seealso{
+ \code{\link{svykm}} for quantiles of survival curves
+
+ \code{\link{svyciprop}} for confidence intervals on proportions.
+}
+\references{
+ Binder DA (1991) Use of estimating functions for interval estimation
+ from complex surveys. \emph{Proceedings of the ASA Survey Research
+ Methods Section} 1991: 34-42
+
+ Dorfman A, Valliant R (1993) Quantile variance estimators in complex
+ surveys. Proceedings of the ASA Survey Research Methods Section. 1993: 866-871
+
+ Korn EL, Graubard BI. (1998) Confidence Intervals For Proportions With
+ Small Expected Number of Positive Counts Estimated From Survey
+ Data. Survey Methodology 23:193-201.
+
+ Francisco CA, Fuller WA (1986) Estimation of the distribution
+ function with a complex survey. Technical Report, Iowa State
+ University.
+
+ Shao J, Tu D (1995) \emph{The Jackknife and Bootstrap}. Springer.
+
+ Shah BV, Vaish AK (2006) Confidence Intervals for Quantile Estimation
+ from Complex Survey Data. Proceedings of the Section on Survey
+ Research Methods.
+
+ Woodruff RS (1952) Confidence intervals for medians and other
+ position measures. JASA 57, 622-627.
+}
+\examples{
+
+ data(api)
+ ## population
+ quantile(apipop$api00,c(.25,.5,.75))
+
+ ## one-stage cluster sample
+ dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+ svyquantile(~api00, dclus1, c(.25,.5,.75),ci=TRUE)
+ svyquantile(~api00, dclus1, c(.25,.5,.75),ci=TRUE,interval.type="betaWald")
+ svyquantile(~api00, dclus1, c(.25,.5,.75),ci=TRUE,df=NULL)
+
+ dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+ (qapi<-svyquantile(~api00, dclus1, c(.25,.5,.75),ci=TRUE, interval.type="score"))
+ SE(qapi)
+
+ #stratified sample
+ dstrat<-svydesign(id=~1, strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+ svyquantile(~api00, dstrat, c(.25,.5,.75),ci=TRUE)
+
+ #stratified sample, replicate weights
+ # interval="probability" is necessary for jackknife weights
+ rstrat<-as.svrepdesign(dstrat)
+ svyquantile(~api00, rstrat, c(.25,.5,.75), interval.type="probability")
+
+
+ # BRR method
+ data(scd)
+ repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
+ c(0,1,0,1,1,0))
+ scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)
+ svyquantile(~arrests+alive, design=scdrep, quantile=0.5, interval.type="quantile")
+
+ }
+\keyword{univar}% at least one, from doc/KEYWORDS
+\keyword{survey}% __ONLY ONE__ keyword per line
diff --git a/man/svyranktest.Rd b/man/svyranktest.Rd
new file mode 100644
index 0000000..91151ed
--- /dev/null
+++ b/man/svyranktest.Rd
@@ -0,0 +1,75 @@
+\name{svyranktest}
+\alias{svyranktest}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Design-based rank tests
+}
+\description{
+Design-based versions of k-sample rank tests. The built-in tests are
+all for location hypotheses, but the user could specify others.
+}
+\usage{
+svyranktest(formula, design,
+ test = c("wilcoxon", "vanderWaerden", "median","KruskalWallis"), ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{
+Model formula \code{y~g} for outcome variable \code{y} and group \code{g}
+}
+ \item{design}{
+A survey design object
+}
+ \item{test}{
+ Which rank test to use: Wilcoxon, van der Waerden's normal-scores
+ test, Mood's test for the median, or a function \code{f(r,N)} where
+ \code{r} is the rank and \code{N} the estimated population
+ size. "KruskalWallis" is a synonym for "wilcoxon" for more than two groups.
+}
+ \item{\dots}{ for future expansion}
+
+}
+
+\value{
+Object of class \code{htest}
+}
+\references{
+Lumley, T., & Scott, A. J. (2013). Two-sample rank tests under complex sampling. BIOMETRIKA, 100 (4), 831-842.
+
+}
+
+\seealso{
+\code{\link{svyttest}}, \code{\link{svylogrank}}
+}
+\examples{
+
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, fpc=~fpc, data=apiclus1)
+
+svyranktest(ell~comp.imp, dclus1)
+svyranktest(ell~comp.imp, dclus1, test="median")
+
+
+svyranktest(ell~stype, dclus1)
+svyranktest(ell~stype, dclus1, test="median")
+
+
+
+## upper quartile
+svyranktest(ell~comp.imp, dclus1, test=function(r,N) as.numeric(r>0.75*N))
+
+
+quantiletest<-function(p){
+ rval<-function(r,N) as.numeric(r>(N*p))
+ attr(rval,"name")<-paste(p,"quantile")
+ rval
+ }
+svyranktest(ell~comp.imp, dclus1, test=quantiletest(0.5))
+svyranktest(ell~comp.imp, dclus1, test=quantiletest(0.75))
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{htest}% __ONLY ONE__ keyword per line
diff --git a/man/svyratio.Rd b/man/svyratio.Rd
new file mode 100755
index 0000000..b2be77d
--- /dev/null
+++ b/man/svyratio.Rd
@@ -0,0 +1,131 @@
+\name{svyratio}
+\alias{svyratio}
+\alias{print.svyratio}
+\alias{print.svyratio_separate}
+\alias{svyratio.svyrep.design}
+\alias{svyratio.survey.design}
+\alias{svyratio.survey.design2}
+\alias{svyratio.twophase}
+\alias{coef.svyratio}
+\alias{SE.svyratio}
+\alias{predict.svyratio}
+\alias{predict.svyratio_separate}
+\alias{confint.svyratio}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Ratio estimation}
+\description{
+Ratio estimation and estimates of totals based on ratios for complex
+survey samples. Estimating domain (subpopulation) means can be done more easily with \code{\link{svymean}}.
+}
+\usage{
+\method{svyratio}{survey.design2}(numerator=formula, denominator,
+ design,separate=FALSE, na.rm=FALSE,formula, covmat=FALSE,deff=FALSE,...)
+\method{svyratio}{svyrep.design}(numerator=formula, denominator, design,
+ na.rm=FALSE,formula, covmat=FALSE,return.replicates=FALSE,deff=FALSE, ...)
+\method{svyratio}{twophase}(numerator=formula, denominator, design,
+ separate=FALSE, na.rm=FALSE,formula,...)
+\method{predict}{svyratio}(object, total, se=TRUE,...)
+\method{predict}{svyratio_separate}(object, total, se=TRUE,...)
+\method{SE}{svyratio}(object,...,drop=TRUE)
+\method{coef}{svyratio}(object,...,drop=TRUE)
+\method{confint}{svyratio}(object, parm, level = 0.95,df =Inf,...)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{numerator,formula}{formula, expression, or data frame giving numerator variable(s)}
+ \item{denominator}{formula, expression, or data frame giving
+ denominator variable(s) }
+ \item{design}{survey design object}
+ \item{object}{result of \code{svyratio}}
+ \item{total}{vector of population totals for the denominator variables in
+ \code{object}, or list of vectors of
+ population stratum totals if \code{separate=TRUE}}
+ \item{se}{Return standard errors?}
+ \item{separate}{Estimate ratio separately for strata}
+ \item{na.rm}{Remove missing values?}
+ \item{covmat}{Compute the full variance-covariance matrix of the
+ ratios}
+\item{deff}{Compute design effects}
+ \item{return.replicates}{Return replicate estimates of ratios}
+ \item{drop}{Return a vector rather than a matrix}
+ \item{parm}{a specification of which parameters are to be given
+ confidence intervals, either a vector of numbers or a vector of
+ names. If missing, all parameters are considered.}
+ \item{level}{the confidence level required.}
+ \item{df}{degrees of freedom for t-distribution in confidence
+ interval, use \code{degf(design)} for number of PSUs minus number of
+ strata} \item{...}{Other unused arguments for other methods}
+}
+\details{
+ The separate ratio estimate of a total is the sum of ratio estimates
+ in each stratum. If the stratum totals supplied in the \code{total}
+ argument and the strata in the design object both have names these
+ names will be matched. If they do not have names it is important that
+ the sample totals are supplied in the correct order, the same order
+ as shown in the output of \code{summary(design)}.
+
+ When \code{design} is a two-phase design, stratification will be on
+ the second phase.
+}
+\value{
+\code{svyratio} returns an object of class \code{svyratio}. The
+\code{predict} method returns a matrix of population totals and
+optionally a matrix of standard errors.
+}
+\references{Levy and Lemeshow. "Sampling of Populations" (3rd edition). Wiley}
+\author{Thomas Lumley}
+
+\seealso{\code{\link{svydesign}}
+
+ \code{\link{svymean}} for estimating proportions and domain means
+
+ \code{\link{calibrate}} for estimators related to the separate ratio estimator.
+}
+
+\examples{
+data(scd)
+
+## survey design objects
+scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
+nest=TRUE, fpc=rep(5,6))
+scdnofpc<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
+nest=TRUE)
+
+# convert to BRR replicate weights
+scd2brr <- as.svrepdesign(scdnofpc, type="BRR")
+
+# use BRR replicate weights from Levy and Lemeshow
+repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
+c(0,1,0,1,1,0))
+scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)
+
+# ratio estimates
+svyratio(~alive, ~arrests, design=scddes)
+svyratio(~alive, ~arrests, design=scdnofpc)
+svyratio(~alive, ~arrests, design=scd2brr)
+svyratio(~alive, ~arrests, design=scdrep)
+
+
+data(api)
+dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+
+## domain means are ratio estimates, but available directly
+svyratio(~I(api.stu*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dstrat)
+svymean(~api.stu, subset(dstrat, comp.imp=="Yes"))
+
+## separate and combined ratio estimates of total
+(sep<-svyratio(~api.stu,~enroll, dstrat,separate=TRUE))
+(com<-svyratio(~api.stu, ~enroll, dstrat))
+
+stratum.totals<-list(E=1877350, H=1013824, M=920298)
+
+predict(sep, total=stratum.totals)
+predict(com, total=sum(unlist(stratum.totals)))
+
+SE(com)
+coef(com)
+coef(com, drop=FALSE)
+confint(com)
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/svyrecvar.Rd b/man/svyrecvar.Rd
new file mode 100755
index 0000000..ac1e9a9
--- /dev/null
+++ b/man/svyrecvar.Rd
@@ -0,0 +1,125 @@
+\name{svyrecvar}
+\alias{svyrecvar}
+\alias{multistage}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Variance estimation for multistage surveys}
+\description{
+ Compute the variance of a total under multistage sampling, using a
+ recursive descent algorithm.
+}
+\usage{
+svyrecvar(x, clusters, stratas,fpcs, postStrata = NULL,
+lonely.psu = getOption("survey.lonely.psu"),
+one.stage=getOption("survey.ultimate.cluster"))
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{x}{Matrix of data or estimating functions}
+ \item{clusters}{Data frame or matrix with cluster ids for each stage}
+ \item{stratas}{Strata for each stage }
+ \item{fpcs}{Information on population and sample size for each stage,
+ created by \code{\link{as.fpc}}}
+ \item{postStrata}{post-stratification information as created by
+ \code{\link{postStratify}} or \code{\link{calibrate}} }
+ \item{lonely.psu}{How to handle strata with a single PSU}
+ \item{one.stage}{If \code{TRUE}, compute a one-stage
+ (ultimate-cluster) estimator}
+}
+\details{
+ The main use of this function is to compute the variance of the sum
+ of a set of estimating functions under multistage sampling. The
+ sampling is assumed to be simple or stratified random sampling within
+ clusters at each stage except perhaps the last stage. The variance of
+ a statistic is computed from the variance of estimating functions as
+ described by Binder (1983).
+
+ Use \code{one.stage=FALSE} for compatibility with other software that
+ does not perform multi-stage calculations, and set
+ \code{options(survey.ultimate.cluster=TRUE)} to make this the default.
+
+ The idea of a recursive algorithm is due to Bellhouse (1985).
+ Texts such as Cochran (1977) and Sarndal et al (1991) describe the
+ decomposition of the variance into a single-stage between-cluster
+ estimator and a within-cluster estimator, and this is applied recursively.
+
+ If \code{one.stage} is a positive integer it specifies the number of
+ stages of sampling to use in the recursive estimator.
+
+ If \code{pps="brewer"}, standard errors are estimated using Brewer's
+ approximation for PPS without replacement, option 2 of those described
+ by Berger (2004). The \code{fpc} argument must then be specified in
+ terms of sampling fractions, not population sizes (or omitted, but
+ then the \code{pps} argument would have no effect and the
+ with-replacement standard errors would be correct).
+}
+\value{
+ A covariance matrix
+}
+\references{
+ Bellhouse DR (1985) Computing Methods for Variance Estimation in Complex Surveys.
+ Journal of Official Statistics. Vol.1, No.3, 1985
+
+ Berger, Y.G. (2004), A Simple Variance Estimator for Unequal
+ Probability Sampling Without Replacement. Journal of Applied
+ Statistics, 31, 305-315.
+
+ Binder, David A. (1983). On the variances of asymptotically normal
+ estimators from complex surveys. International Statistical Review,
+ 51, 279-292.
+
+ Brewer KRW (2002) Combined Survey Sampling Inference (Weighing Basu's
+ Elephants) [Chapter 9]
+
+ Cochran, W. (1977) Sampling Techniques. 3rd edition. Wiley.
+
+ Sarndal C-E, Swensson B, Wretman J (1991) Model Assisted Survey
+ Sampling. Springer.
+
+}
+\note{
+ A simple set of finite population corrections will only be exactly
+ correct when each successive stage uses simple or stratified random
+ sampling without replacement. A correction under general unequal
+ probability sampling (eg PPS) would require joint inclusion probabilities (or,
+ at least, sampling probabilities for units not included in the sample),
+ information not generally available.
+
+ The quality of Brewer's approximation is excellent in Berger's
+ simulations, but the accuracy may vary depending on the sampling
+ algorithm used.
+}
+
+
+\seealso{
+ \code{\link{svrVar}} for replicate weight designs
+
+ \code{\link{svyCprod}} for a description of how variances are
+ estimated at each stage
+ }
+ \examples{
+data(mu284)
+dmu284<-svydesign(id=~id1+id2,fpc=~n1+n2, data=mu284)
+svytotal(~y1, dmu284)
+
+
+data(api)
+# two-stage cluster sample
+dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
+summary(dclus2)
+svymean(~api00, dclus2)
+svytotal(~enroll, dclus2,na.rm=TRUE)
+
+# bootstrap for multistage sample
+mrbclus2<-as.svrepdesign(dclus2, type="mrb", replicates=100)
+svytotal(~enroll, mrbclus2, na.rm=TRUE)
+
+# two-stage `with replacement'
+dclus2wr<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)
+summary(dclus2wr)
+svymean(~api00, dclus2wr)
+svytotal(~enroll, dclus2wr,na.rm=TRUE)
+
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/svysmooth.Rd b/man/svysmooth.Rd
new file mode 100644
index 0000000..168b62a
--- /dev/null
+++ b/man/svysmooth.Rd
@@ -0,0 +1,83 @@
+\name{svysmooth}
+\alias{svysmooth}
+\alias{svysmooth.default}
+\alias{plot.svysmooth}
+\alias{print.svysmooth}
+\alias{lines.svysmooth}
+\alias{make.panel.svysmooth}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Scatterplot smoothing and density estimation}
+\description{
+Scatterplot smoothing and density estimation for probability-weighted
+data.
+}
+\usage{
+svysmooth(formula, design, ...)
+\method{svysmooth}{default}(formula, design, method = c("locpoly", "quantreg"),
+ bandwidth = NULL, quantile, df = 4, ...)
+\method{plot}{svysmooth}(x, which=NULL, type="l", xlabs=NULL, ylab=NULL,...)
+\method{lines}{svysmooth}(x,which=NULL,...)
+make.panel.svysmooth(design,bandwidth=NULL)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{One-sided formula for density estimation, two-sided for smoothing}
+ \item{design}{Survey design object}
+ \item{method}{local polynomial smoothing for the mean or regression
+ splines for quantiles}
+ \item{bandwidth}{Smoothing bandwidth for "locpoly" or \code{NULL} for automatic choice}
+ \item{quantile}{quantile to be estimated for "quantreg"}
+ \item{df}{Degrees of freedom for "quantreg"}
+ \item{which}{Which plots to show (default is all)}
+ \item{type}{as for \code{plot}}
+ \item{xlabs}{Optional vector of x-axis labels}
+ \item{ylab}{Optional y-axis label}
+ \item{\dots}{More arguments}
+ \item{x}{Object of class \code{svysmooth}}
+}
+\details{
+\code{svysmooth} does one-dimensional smoothing. If \code{formula} has
+multiple predictor variables a separate one-dimensional smooth is
+performed for each one.
+
+For \code{method="locpoly"} the extra arguments are passed to
+\code{locpoly} from the KernSmooth package, for
+\code{method="quantreg"} they are passed to \code{rq} from the
+quantreg package. The automatic choice of bandwidth for
+\code{method="locpoly"} uses the default settings for \code{dpik} and
+\code{dpill} in the KernSmooth package.
+
+\code{make.panel.svysmooth()} makes a function that plots points and
+draws a weighted smooth curve through them, a weighted replacement for
+\code{\link{panel.smooth}} that can be passed to functions such as
+\code{\link{termplot}} or \code{\link{plot.lm}}. The resulting function has a \code{span} argument that will set the bandwidth; if this is not specified the automatic choice will be used.
+}
+\value{
+An object of class \code{svysmooth}, a list of lists, each with \code{x} and \code{y} components.
+}
+
+\seealso{\code{\link{svyhist}} for histograms}
+\examples{
+ data(api)
+ dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+
+ smth<-svysmooth(api00~api99+ell,dstrat)
+ dens<-svysmooth(~api99, dstrat,bandwidth=30)
+ dens1<-svysmooth(~api99, dstrat)
+ qsmth<-svysmooth(api00~ell,dstrat, quantile=0.75, df=3,method="quantreg")
+
+ plot(smth)
+ plot(smth, which="ell",lty=2,ylim=c(500,900))
+ lines(qsmth, col="red")
+
+ svyhist(~api99,design=dstrat)
+ lines(dens,col="purple",lwd=3)
+ lines(dens1, col="forestgreen",lwd=2)
+
+ m<-svyglm(api00~sin(api99/100)+stype, design=dstrat)
+ termplot(m, data=model.frame(dstrat), partial.resid=TRUE, se=TRUE,
+ smooth=make.panel.svysmooth(dstrat))
+}
+\keyword{hplot}% at least one, from doc/KEYWORDS
+\keyword{survey}
+
diff --git a/man/svystandardize.Rd b/man/svystandardize.Rd
new file mode 100644
index 0000000..f0cccf8
--- /dev/null
+++ b/man/svystandardize.Rd
@@ -0,0 +1,59 @@
+\name{svystandardize}
+\alias{svystandardize}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Direct standardization within domains
+}
+\description{
+In health surveys it is often of interest to standardize domains to have the same distribution of, eg, age as in a target population. The operation is similar to post-stratification, except that the totals for the domains are fixed at the current estimates, not at known population values. This function matches the estimates produced by the (US) National Center for Health Statistics.
+}
+\usage{
+svystandardize(design, by, over, population, excluding.missing = NULL)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{design}{
+ survey design object
+}
+ \item{by}{
+A one-sided formula specifying the variables whose distribution will be standardised
+}
+ \item{over}{
+A one-sided formula specifying the domains within which the standardisation will occur
+}
+ \item{population}{
+Desired population totals or proportions for the levels of combinations of variables in \code{by}
+}
+ \item{excluding.missing}{
+ Optionally, a one-sided formula specifying variables whose missing values should be dropped before calculating the domain totals.
+}
+}
+
+\value{
+A new survey design object of the same type as the input.
+}
+\references{
+National Center for Health Statistics \url{http://www.cdc.gov/nchs/tutorials/NHANES/NHANESAnalyses/agestandardization/age_standardization_intro.htm}}
+\note{
+The standard error estimates do not exactly match the NCHS estimates
+}
+
+
+
+\seealso{
+\code{\link{postStratify}}, \code{\link{svyby}}
+}
+\examples{
+## matches http://www.cdc.gov/nchs/data/databriefs/db92_fig1.png
+data(nhanes)
+popage <- c( 55901 , 77670 , 72816 , 45364 )
+design<-svydesign(id=~SDMVPSU, strata=~SDMVSTRA, weights=~WTMEC2YR, data=nhanes, nest=TRUE)
+stdes<-svystandardize(design, by=~agecat, over=~race+RIAGENDR,
+ population=popage, excluding.missing=~HI_CHOL)
+svyby(~HI_CHOL, ~race+RIAGENDR, svymean, design=subset(stdes, agecat!="(0,19]"))
+}
+
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/svyttest.Rd b/man/svyttest.Rd
new file mode 100644
index 0000000..c6c9910
--- /dev/null
+++ b/man/svyttest.Rd
@@ -0,0 +1,38 @@
+\name{svyttest}
+\alias{svyttest}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Design-based t-test}
+\description{
+One-sample or two-sample t-test. This function is a wrapper for
+\code{\link{svymean}} in the one-sample case and for
+\code{\link{svyglm}} in the two-sample case. Degrees of freedom are
+\code{degf(design)} for the one-sample test and \code{degf(design)-1}
+for the two-sample case.
+}
+\usage{
+svyttest(formula, design, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{formula}{Formula, \code{outcome~group} for two-sample,
+ \code{outcome~0} or \code{outcome~1} for one-sample }
+ \item{design}{survey design object}
+ \item{\dots}{for methods }
+}
+\value{
+ Object of class \code{htest}
+}
+
+\seealso{ \code{\link{t.test}}}
+\examples{
+data(api)
+dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
+svyttest(enroll~comp.imp, dclus2)
+
+svyttest(I(api00-api99)~0, dclus2)
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+\keyword{htest}% __ONLY ONE__ keyword per line
diff --git a/man/trimWeights.Rd b/man/trimWeights.Rd
new file mode 100644
index 0000000..e6af1b9
--- /dev/null
+++ b/man/trimWeights.Rd
@@ -0,0 +1,75 @@
+\name{trimWeights}
+\Rdversion{1.1}
+\alias{trimWeights}
+\alias{trimWeights.svyrep.design}
+\alias{trimWeights.survey.design2}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+Trim sampling weights
+}
+\description{
+Trims very high or very low sampling weights to reduce the influence of outlying observations. In a replicate-weight design object, the replicate weights are also trimmed. The total amount trimmed is divided among the observations that were not trimmed, so that the total weight remains the same.
+}
+\usage{
+trimWeights(design, upper = Inf, lower = -Inf, ...)
+\method{trimWeights}{survey.design2}(design, upper = Inf, lower = -Inf, strict=FALSE,...)
+\method{trimWeights}{svyrep.design}(design, upper = Inf, lower = -Inf,compress=FALSE,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{design}{
+ A survey design object
+}
+ \item{upper}{
+ Upper bound for weights
+}
+ \item{lower}{
+ Lower bound for weights
+ }
+ \item{strict}{
+ The reapportionment of the `trimmings' from the weights can push
+ other weights over the limits. If \code{trim=TRUE} the function
+ calls itself recursively to prevent this.
+ }
+ \item{compress}{
+ Compress the replicate weights after trimming.
+ }
+ \item{\dots}{
+ Other arguments for future expansion
+}
+}
+
+\value{
+A new survey design object with trimmed weights.
+}
+
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+\code{\link{calibrate}} has a \code{trim} option for trimming the
+calibration adjustments.
+}
+\examples{
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018,
+api99=3914069)
+dclus1g<-calibrate(dclus1, ~stype+api99, pop.totals)
+
+summary(weights(dclus1g))
+dclus1t<-trimWeights(dclus1g,lower=20, upper=45)
+summary(weights(dclus1t))
+dclus1tt<-trimWeights(dclus1g, lower=20, upper=45,strict=TRUE)
+summary(weights(dclus1tt))
+
+
+svymean(~api99+api00+stype, dclus1g)
+svymean(~api99+api00+stype, dclus1t)
+svymean(~api99+api00+stype, dclus1tt)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey}
+
diff --git a/man/twophase.Rd b/man/twophase.Rd
new file mode 100644
index 0000000..3a646c4
--- /dev/null
+++ b/man/twophase.Rd
@@ -0,0 +1,199 @@
+\name{twophase}
+\alias{twophase}
+\alias{twophasevar}
+\alias{twophase2var}
+\alias{[.twophase}
+\alias{subset.twophase}
+\alias{print.twophase}
+\alias{summary.twophase}
+\alias{print.summary.twophase}
+\alias{model.frame.twophase}
+\alias{na.fail.twophase}
+\alias{na.omit.twophase}
+\alias{na.exclude.twophase}
+\alias{svyrecvar.phase1}
+\alias{multistage.phase1}
+\alias{onestage.phase1}
+\alias{onestrat.phase1}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Two-phase designs}
+\description{
+In a two-phase design a sample is taken from a population and a
+subsample taken from the sample, typically stratified by variables not
+known for the whole population. The second phase can use any design
+supported for single-phase sampling. The first phase must currently
+be one-stage element or cluster sampling
+}
+\usage{
+twophase(id, strata = NULL, probs = NULL, weights = NULL, fpc = NULL,
+subset, data, method=c("full","approx","simple"))
+twophasevar(x,design)
+twophase2var(x,design)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{id}{list of two formulas for sampling unit identifiers}
+ \item{strata}{list of two formulas (or \code{NULL}s) for stratum identifies}
+ \item{probs}{ list of two formulas (or \code{NULL}s) for sampling probabilities}
+ \item{weights}{Only for \code{method="approx"}, list of two formulas (or \code{NULL}s) for sampling weights}
+ \item{fpc}{list of two formulas (or \code{NULL}s) for finite
+ population corrections}
+ \item{subset}{formula specifying which observations are selected in
+ phase 2}
+ \item{data}{Data frame will all data for phase 1 and 2}
+ \item{method}{\code{"full"} requires (much) more memory, but gives unbiased
+ variance estimates for general multistage designs at both phases.
+ \code{"simple"} or \code{"approx"} uses the standard error calculation from
+ version 3.14 and earlier, which uses much less memory and is correct for designs with simple
+ random sampling at phase one and stratified random sampling at phase two.
+ }
+ \item{x}{probability-weighted estimating functions}
+ \item{design}{two-phase design}
+}
+\details{
+ The population for the second phase is the first-phase sample. If the
+ second phase sample uses stratified (multistage cluster) sampling
+ without replacement and all the stratum and sampling unit identifier
+ variables are available for the whole first-phase sample it is
+ possible to estimate the sampling probabilities/weights and the
+ finite population correction. These would then be specified as
+ \code{NULL}.
+
+ Two-phase case-control and case-cohort studies in biostatistics will
+ typically have simple random sampling with replacement as the first
+ stage. Variances given here may differ slightly from those in the
+ biostatistics literature where a model-based estimator of the
+ first-stage variance would typically be used.
+
+ Variance computations are based on the conditioning argument in
+ Section 9.3 of Sarndal et al. Method \code{"full"} corresponds exactly
+ to the formulas in that reference. Method \code{"simple"} or
+ \code{"approx"} (the two are the same) uses less time and memory but
+ is exact only for some special cases. The most important special case
+ is the two-phase epidemiologic designs where phase 1 is simple random
+ sampling from an infinite population and phase 2 is stratified random
+ sampling. See the \code{tests} directory for a worked example. The
+ only disadvantage of method="simple" in these cases is that
+ standardization of margins (\code{\link{marginpred}}) is not available.
+
+
+ For \code{method="full"}, sampling probabilities must be available for
+ each stage of sampling, within each phase. For multistage sampling
+ this requires specifying either \code{fpc} or \code{probs} as a
+ formula with a term for each stage of sampling. If no \code{fpc} or
+ \code{probs} are specified at phase 1 it is treated as simple random
+ sampling from an infinite population, and population totals will not
+ be correctly estimated, but means, quantiles, and regression models
+ will be correct.
+ }
+
+\value{
+
+ \code{twophase} returns an object of class \code{twophase2} (for
+ \code{method="full"}) or \code{twophase}. The structure of
+ \code{twophase2} objects may change as unnecessary components are removed.
+
+ \code{twophase2var} and \code{twophasevar} return a variance matrix with an attribute
+ containing the separate phase 1 and phase 2 contributions to the variance.
+}
+\references{
+Sarndal CE, Swensson B, Wretman J (1992) "Model Assisted Survey Sampling"
+Springer.
+
+
+Breslow NE and Chatterjee N, Design and analysis of two-phase
+studies with binary outcome applied to Wilms tumour prognosis. "Applied
+Statistics" 48:457-68, 1999
+
+Breslow N, Lumley T, Ballantyne CM, Chambless LE, Kulick M. (2009)
+Improved Horvitz-Thompson estimation of model parameters from two-phase
+stratified samples: applications in epidemiology. Statistics in
+Biosciences. doi 10.1007/s12561-009-9001-6
+
+
+Lin, DY and Ying, Z (1993). Cox regression with incomplete covariate measurements.
+"Journal of the American Statistical Association" 88: 1341-1349.
+}
+
+\seealso{\code{\link{svydesign}}, \code{\link{svyrecvar}} for multi*stage*
+ sampling
+
+ \code{\link{calibrate}} for calibration (GREG) estimators.
+
+ \code{\link{estWeights}} for two-phase designs for missing data.
+
+ The "epi" and "phase1" vignettes for examples and technical details.
+}
+\examples{
+ ## two-phase simple random sampling.
+ data(pbc, package="survival")
+ pbc$randomized<-with(pbc, !is.na(trt) & trt>0)
+ pbc$id<-1:nrow(pbc)
+ d2pbc<-twophase(id=list(~id,~id), data=pbc, subset=~randomized)
+ svymean(~bili, d2pbc)
+
+ ## two-stage sampling as two-phase
+ data(mu284)
+ ii<-with(mu284, c(1:15, rep(1:5,n2[1:5]-3)))
+ mu284.1<-mu284[ii,]
+ mu284.1$id<-1:nrow(mu284.1)
+ mu284.1$sub<-rep(c(TRUE,FALSE),c(15,34-15))
+ dmu284<-svydesign(id=~id1+id2,fpc=~n1+n2, data=mu284)
+ ## first phase cluster sample, second phase stratified within cluster
+ d2mu284<-twophase(id=list(~id1,~id),strata=list(NULL,~id1),
+ fpc=list(~n1,NULL),data=mu284.1,subset=~sub)
+ svytotal(~y1, dmu284)
+ svytotal(~y1, d2mu284)
+ svymean(~y1, dmu284)
+ svymean(~y1, d2mu284)
+
+ ## case-cohort design: this example requires R 2.2.0 or later
+ library("survival")
+ data(nwtco)
+
+ ## stratified on case status
+ dcchs<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
+ subset=~I(in.subcohort | rel), data=nwtco)
+ svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12), design=dcchs)
+
+ ## Using survival::cch
+ subcoh <- nwtco$in.subcohort
+ selccoh <- with(nwtco, rel==1|subcoh==1)
+ ccoh.data <- nwtco[selccoh,]
+ ccoh.data$subcohort <- subcoh[selccoh]
+ cch(Surv(edrel, rel) ~ factor(stage) + factor(histol) + I(age/12), data =ccoh.data,
+ subcoh = ~subcohort, id=~seqno, cohort.size=4028, method="LinYing")
+
+
+ ## two-phase case-control
+ ## Similar to Breslow & Chatterjee, Applied Statistics (1999) but with
+ ## a slightly different version of the data set
+
+ nwtco$incc2<-as.logical(with(nwtco, ifelse(rel | instit==2,1,rbinom(nrow(nwtco),1,.1))))
+ dccs2<-twophase(id=list(~seqno,~seqno),strata=list(NULL,~interaction(rel,instit)),
+ data=nwtco, subset=~incc2)
+ dccs8<-twophase(id=list(~seqno,~seqno),strata=list(NULL,~interaction(rel,stage,instit)),
+ data=nwtco, subset=~incc2)
+ summary(glm(rel~factor(stage)*factor(histol),data=nwtco,family=binomial()))
+ summary(svyglm(rel~factor(stage)*factor(histol),design=dccs2,family=quasibinomial()))
+ summary(svyglm(rel~factor(stage)*factor(histol),design=dccs8,family=quasibinomial()))
+
+ ## Stratification on stage is really post-stratification, so we should use calibrate()
+ gccs8<-calibrate(dccs2, phase=2, formula=~interaction(rel,stage,instit))
+ summary(svyglm(rel~factor(stage)*factor(histol),design=gccs8,family=quasibinomial()))
+
+ ## For this saturated model calibration is equivalent to estimating weights.
+ pccs8<-calibrate(dccs2, phase=2,formula=~interaction(rel,stage,instit), calfun="rrz")
+ summary(svyglm(rel~factor(stage)*factor(histol),design=pccs8,family=quasibinomial()))
+
+ ## Since sampling is SRS at phase 1 and stratified RS at phase 2, we
+ ## can use method="simple" to save memory.
+ dccs8_simple<-twophase(id=list(~seqno,~seqno),strata=list(NULL,~interaction(rel,stage,instit)),
+ data=nwtco, subset=~incc2,method="simple")
+ summary(svyglm(rel~factor(stage)*factor(histol),design=dccs8_simple,family=quasibinomial()))
+
+
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/update.survey.design.Rd b/man/update.survey.design.Rd
new file mode 100755
index 0000000..dd36eed
--- /dev/null
+++ b/man/update.survey.design.Rd
@@ -0,0 +1,54 @@
+\name{update.survey.design}
+\alias{update.survey.design}
+\alias{update.twophase}
+\alias{update.svyrep.design}
+\alias{update.DBIsvydesign}
+\alias{update.ODBCsvydesign}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{ Add variables to a survey design}
+\description{
+Update the data variables in a survey design, either with a formula for a new set of variables or with an expression for variables to be added.
+}
+\usage{
+\method{update}{survey.design}(object, ...)
+\method{update}{twophase}(object, ...)
+\method{update}{svyrep.design}(object, ...)
+\method{update}{DBIsvydesign}(object, ...)
+\method{update}{ODBCsvydesign}(object, ...)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{object}{a survey design object}
+ \item{\dots}{Arguments \code{tag=expr} add a new variable \code{tag}
+ computed by evaluating \code{expr} in the survey data.}
+}
+\details{
+Database-backed objects may not have write access to the database and so
+\code{update} does not attempt to modify the database. The expressions
+are stored and are evaluated when the data is loaded.
+
+If a set of new variables will be used extensively it may be more efficient to
+modify the database, either with SQL queries from the R interface or
+separately. One useful intermediate approach is to create a table with
+the new variables and a view that joins this table to the table of
+existing variables.
+
+There is now a base-R function \code{\link{transform}} for adding new
+ variables to a data frame, so I have added \code{transform} as a synonym for
+ \code{update} for survey objects.
+ }
+\value{
+A survey design object
+}
+
+\seealso{\code{\link{svydesign}}, \code{\link{svrepdesign}}, \code{\link{twophase}}}
+
+\examples{
+data(api)
+dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat,
+fpc=~fpc)
+dstrat<-update(dstrat, apidiff=api00-api99)
+svymean(~api99+api00+apidiff, dstrat)
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+\keyword{manip}% __ONLY ONE__ keyword per line
diff --git a/man/weights.survey.design.Rd b/man/weights.survey.design.Rd
new file mode 100755
index 0000000..a4e1c31
--- /dev/null
+++ b/man/weights.survey.design.Rd
@@ -0,0 +1,49 @@
+\name{weights.survey.design}
+\alias{weights.survey.design}
+\alias{weights.svyrep.design}
+\alias{weights.survey_fpc}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Survey design weights}
+\description{
+Extract weights from a survey design object.
+}
+\usage{
+\method{weights}{survey.design}(object, ...)
+\method{weights}{svyrep.design}(object,
+type=c("replication","sampling","analysis"), ...)
+\method{weights}{survey_fpc}(object,final=TRUE,...)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{object}{Survey design object}
+ \item{type}{Type of weights: \code{"analysis"} combines sampling and
+ replication weights.}
+ \item{final}{If \code{FALSE} return a data frame with sampling
+ weights at each stage of sampling.}
+ \item{\dots}{Other arguments ignored }
+}
+
+\value{
+ vector or matrix of weights
+}
+
+\seealso{\code{\link{svydesign}}, \code{\link{svrepdesign}},
+ \code{\link{as.fpc}} }
+
+\examples{
+data(scd)
+
+
+scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
+ nest=TRUE, fpc=rep(5,6))
+repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1), c(0,1,0,1,1,0))
+scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)
+
+weights(scdrep)
+weights(scdrep, type="sampling")
+weights(scdrep, type="analysis")
+weights(scddes)
+
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+
diff --git a/man/with.svyimputationList.Rd b/man/with.svyimputationList.Rd
new file mode 100644
index 0000000..b44cca9
--- /dev/null
+++ b/man/with.svyimputationList.Rd
@@ -0,0 +1,54 @@
+\name{with.svyimputationList}
+\alias{with.svyimputationList}
+\alias{subset.svyimputationList}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Analyse multiple imputations}
+\description{
+Performs a survey analysis on each of the designs in a
+\code{svyimputationList} objects and returns a list of results suitable
+for \code{MIcombine}. The analysis may be specified as an expression or
+as a function.
+}
+\usage{
+\method{with}{svyimputationList}(data, expr, fun, ...,multicore=getOption("survey.multicore"))
+\method{subset}{svyimputationList}(x, subset,...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{data,x}{A \code{svyimputationList} object }
+ \item{expr}{An expression giving a survey analysis}
+ \item{fun}{A function taking a survey design object as its argument }
+ \item{\dots}{for future expansion }
+ \item{multicore}{Use \code{multicore} package to distribute imputed data sets over multiple processors?}
+ \item{subset}{An logical expression specifying the subset}
+
+}
+
+\value{
+A list of the results from applying the analysis to each design object.
+}
+
+\seealso{\code{MIcombine}, in the \code{mitools} package }
+\examples{
+library(mitools)
+data.dir<-system.file("dta",package="mitools")
+files.men<-list.files(data.dir,pattern="m.\\\\.dta$",full=TRUE)
+men<-imputationList(lapply(files.men, foreign::read.dta))
+files.women<-list.files(data.dir,pattern="f.\\\\.dta$",full=TRUE)
+women<-imputationList(lapply(files.women, foreign::read.dta))
+men<-update(men, sex=1)
+women<-update(women,sex=0)
+all<-rbind(men,women)
+
+designs<-svydesign(id=~id, strata=~sex, data=all)
+designs
+
+results<-with(designs, svymean(~drkfre))
+
+MIcombine(results)
+
+summary(MIcombine(results))
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{survey }% __ONLY ONE__ keyword per line
diff --git a/man/withReplicates.Rd b/man/withReplicates.Rd
new file mode 100755
index 0000000..f130d0d
--- /dev/null
+++ b/man/withReplicates.Rd
@@ -0,0 +1,107 @@
+\name{withReplicates}
+\alias{withReplicates}
+\alias{withReplicates.svyrep.design}
+\alias{withReplicates.svrepvar}
+\alias{withReplicates.svrepstat}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Compute variances by replicate weighting}
+\description{
+Given a function or expression computing a statistic based on sampling
+weights, \code{withReplicates} evaluates the statistic and produces a
+replicate-based estimate of variance.
+}
+\usage{
+withReplicates(design, theta,..., return.replicates=FALSE)
+\method{withReplicates}{svyrep.design}(design, theta, rho = NULL, ...,
+ scale.weights=FALSE, return.replicates=FALSE)
+\method{withReplicates}{svrepvar}(design, theta, ..., return.replicates=FALSE)
+\method{withReplicates}{svrepstat}(design, theta, ..., return.replicates=FALSE)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{design}{A survey design with replicate weights (eg from \code{\link{svrepdesign}}) or a suitable object with replicate parameter estimates}
+ \item{theta}{A function or expression: see Details below}
+ \item{rho}{If \code{design} uses BRR weights, \code{rho} optionally
+ specifies the parameter for Fay's variance estimator.}
+ \item{\dots}{Other arguments to \code{theta}}
+ \item{scale.weights}{Divide the probability weights by their sum (can
+ help with overflow problems)}
+ \item{return.replicates}{Return the replicate estimates as well as
+ the variance?}
+}
+\details{
+The method for \code{svyrep.design} objects evaluates a function or
+expression using the sampling weights and then each set of replicate
+weights. The method for \code{svrepvar} objects evaluates the function
+or expression on an estimated population covariance matrix and its
+replicates, to simplify multivariate statistics such as structural
+equation models.
+
+For the \code{svyrep.design} method, if \code{theta} is a function its first argument will be a vector of
+ weights and the second argument will be a data frame containing the
+ variables from the design object. If it is an expression, the sampling weights will be available as the
+ variable \code{.weights}. Variables in the design object will also
+ be in scope. It is possible to use global variables in the
+ expression, but unwise, as they may be masked by local variables
+ inside \code{withReplicates}.
+
+For the \code{svrepvar} method a function will get the covariance
+matrix as its first argument, and an expression will be evaluated with
+\code{.replicate} set to the variance matrix.
+
+For the \code{svrepstat} method a function will get the point estimate, and an expression will be evaluated with
+\code{.replicate} set to each replicate. The method can only be used
+when the \code{svrepstat} object includes replicates.
+
+}
+\value{
+ If \code{return.replicates=FALSE}, the weighted statistic, with the
+ variance matrix as the \code{"var"} attribute. If
+ \code{return.replicates=TRUE}, a list with elements \code{theta} for
+ the usual return value and \code{replicates} for the replicates.
+}
+\seealso{ \code{\link{svrepdesign}}, \code{\link{as.svrepdesign}}, \code{\link{svrVar}}}
+
+\examples{
+data(scd)
+repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
+c(0,1,0,1,1,0))
+scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)
+
+a<-svyratio(~alive, ~arrests, design=scdrep)
+print(a$ratio)
+print(a$var)
+withReplicates(scdrep, quote(sum(.weights*alive)/sum(.weights*arrests)))
+withReplicates(scdrep, function(w,data)
+sum(w*data$alive)/sum(w*data$arrests))
+
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+rclus1<-as.svrepdesign(dclus1)
+varmat<-svyvar(~api00+api99+ell+meals+hsg+mobility,rclus1,return.replicates=TRUE)
+withReplicates(varmat, quote( factanal(covmat=.replicate, factors=2)$unique) )
+
+
+data(nhanes)
+nhanesdesign <- svydesign(id=~SDMVPSU, strata=~SDMVSTRA, weights=~WTMEC2YR, nest=TRUE,data=nhanes)
+logistic <- svyglm(HI_CHOL~race+agecat+RIAGENDR, design=as.svrepdesign(nhanesdesign),
+family=quasibinomial, return.replicates=TRUE)
+fitted<-predict(logistic, return.replicates=TRUE, type="response")
+sensitivity<-function(pred,actual) mean(pred>0.1 & actual)/mean(actual)
+withReplicates(fitted, sensitivity, actual=logistic$y)
+
+\dontrun{
+library(quantreg)
+data(api)
+## one-stage cluster sample
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+## convert to bootstrap
+bclus1<-as.svrepdesign(dclus1,type="bootstrap", replicates=100)
+
+## median regression
+withReplicates(bclus1, quote(coef(rq(api00~api99, tau=0.5, weights=.weights))))
+}
+}
+\keyword{survey}% at least one, from doc/KEYWORDS
+
+
diff --git a/man/yrbs.Rd b/man/yrbs.Rd
new file mode 100644
index 0000000..68efd95
--- /dev/null
+++ b/man/yrbs.Rd
@@ -0,0 +1,46 @@
+\name{yrbs}
+\alias{yrbs}
+\docType{data}
+\title{
+One variable from the Youth Risk Behaviors Survey, 2015.
+}
+\description{
+Design information from the Youth Risk Behaviors Survey (YRBS), together
+with the single variable `Never/Rarely wore bike helmet'. Used as an
+analysis example by CDC.
+}
+\usage{data("yrbs")}
+\format{
+ A data frame with 15624 observations on the following 4 variables.
+ \describe{
+ \item{\code{weight}}{sampling weights}
+ \item{\code{stratum}}{sampling strata}
+ \item{\code{psu}}{primary sampling units}
+ \item{\code{qn8}}{1=Yes, 2=No}
+ }
+}
+
+\source{
+ \url{ftp://ftp.cdc.gov/pub/data/yrbs/2015smy/} for files
+
+}
+\references{
+Centers for Disease Control and Prevention (2016) Software for Analysis
+of YRBS Data.\url{https://www.cdc.gov/healthyyouth/data/yrbs/pdf/2015/2015_yrbs_analysis_software.pdf}
+}
+\examples{
+data(yrbs)
+
+yrbs_design <- svydesign(id=~psu, weight=~weight, strata=~stratum,
+data=yrbs)
+yrbs_design <- update(yrbs_design, qn8yes=2-qn8)
+
+ci <- svyciprop(~qn8yes, yrbs_design, na.rm=TRUE, method="xlogit")
+ci
+
+## to print more digits: matches SUDAAN and SPSS exactly, per table 3 of reference
+coef(ci)
+SE(ci)
+attr(ci,"ci")
+}
+\keyword{datasets}
diff --git a/tests/DBIcheck.R b/tests/DBIcheck.R
new file mode 100644
index 0000000..71c1192
--- /dev/null
+++ b/tests/DBIcheck.R
@@ -0,0 +1,58 @@
+
+library(survey)
+library(RSQLite)
+
+data(api)
+apiclus1$api_stu<-apiclus1$api.stu
+apiclus1$comp_imp<-apiclus1$comp.imp
+dclus1<-svydesign(id=~dnum, weights=~pw, fpc=~fpc,data=apiclus1)
+dbclus1<-svydesign(id=~dnum, weights=~pw, fpc=~fpc,
+data="apiclus1",dbtype="SQLite", dbname=system.file("api.db",package="survey"))
+
+m<-svymean(~api00+stype,dclus1)
+m.db<-svymean(~api00+stype, dbclus1)
+all.equal(coef(m),coef(m.db))
+all.equal(vcov(m), vcov(m.db))
+
+r<-svyratio(~api_stu, ~enroll, design=dclus1)
+r.db<-svyratio(~api_stu, ~enroll, design=dbclus1)
+all.equal(coef(r), coef(r.db))
+all.equal(SE(r), SE(r.db))
+
+b<-svyby(~api99+api00,~stype, design=dclus1, svymean, deff=TRUE)
+b.db<-svyby(~api99+api00,~stype, design=dbclus1,svymean, deff=TRUE)
+all.equal(coef(b), coef(b.db))
+all.equal(SE(b), SE(b.db))
+all.equal(deff(b), deff(b.db))
+
+l<-svyglm(api00~api99+mobility, design=dclus1)
+l.db<-svyglm(api00~api99+mobility, design=dbclus1)
+all.equal(coef(l),coef(l.db))
+all.equal(vcov(l), vcov(l.db))
+
+dclus1<-update(dclus1, apidiff=api00-api99)
+dclus1<-update(dclus1, apipct= apidiff/api99)
+dbclus1<-update(dbclus1, apidiff=api00-api99)
+dbclus1<-update(dbclus1, apipct= apidiff/api99)
+
+u<-svymean(~api00+apidiff+apipct, dclus1)
+u.db<-svymean(~api00+apidiff+apipct, dbclus1)
+all.equal(u, u.db)
+
+all.equal(nrow(dclus1),nrow(dbclus1))
+all.equal(nrow(subset(dclus1,stype=="E")),
+ nrow(subset(dbclus1,stype=="E")))
+
+## replicate weights
+rclus1<-as.svrepdesign(dclus1)
+db_rclus1<-svrepdesign(weights=~pw, repweights="wt[1-9]+", type="JK1", scale=(1-15/757)*14/15,
+data="apiclus1rep",dbtype="SQLite", dbname=system.file("api.db",package="survey"),combined.weights=FALSE)
+m<-svymean(~api00+api99,rclus1)
+m.db<-svymean(~api00+api99,db_rclus1)
+all.equal(m,m.db)
+
+summary(db_rclus1)
+
+s<-svymean(~api00, subset(rclus1, comp_imp=="Yes"))
+s.db<-svymean(~api00, subset(db_rclus1, comp_imp=="Yes"))
+all.equal(s,s.db)
diff --git a/tests/DBIcheck.Rout.save b/tests/DBIcheck.Rout.save
new file mode 100644
index 0000000..74c360d
--- /dev/null
+++ b/tests/DBIcheck.Rout.save
@@ -0,0 +1,119 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+>
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> library(RSQLite)
+Loading required package: DBI
+>
+> data(api)
+> apiclus1$api_stu<-apiclus1$api.stu
+> apiclus1$comp_imp<-apiclus1$comp.imp
+> dclus1<-svydesign(id=~dnum, weights=~pw, fpc=~fpc,data=apiclus1)
+> dbclus1<-svydesign(id=~dnum, weights=~pw, fpc=~fpc,
++ data="apiclus1",dbtype="SQLite", dbname=system.file("api.db",package="survey"))
+>
+> m<-svymean(~api00+stype,dclus1)
+> m.db<-svymean(~api00+stype, dbclus1)
+> all.equal(coef(m),coef(m.db))
+[1] TRUE
+> all.equal(vcov(m), vcov(m.db))
+[1] TRUE
+>
+> r<-svyratio(~api_stu, ~enroll, design=dclus1)
+> r.db<-svyratio(~api_stu, ~enroll, design=dbclus1)
+> all.equal(coef(r), coef(r.db))
+[1] TRUE
+> all.equal(SE(r), SE(r.db))
+[1] TRUE
+>
+> b<-svyby(~api99+api00,~stype, design=dclus1, svymean, deff=TRUE)
+> b.db<-svyby(~api99+api00,~stype, design=dbclus1,svymean, deff=TRUE)
+> all.equal(coef(b), coef(b.db))
+[1] TRUE
+> all.equal(SE(b), SE(b.db))
+[1] TRUE
+> all.equal(deff(b), deff(b.db))
+[1] TRUE
+>
+> l<-svyglm(api00~api99+mobility, design=dclus1)
+> l.db<-svyglm(api00~api99+mobility, design=dbclus1)
+> all.equal(coef(l),coef(l.db))
+[1] TRUE
+> all.equal(vcov(l), vcov(l.db))
+[1] TRUE
+>
+> dclus1<-update(dclus1, apidiff=api00-api99)
+> dclus1<-update(dclus1, apipct= apidiff/api99)
+> dbclus1<-update(dbclus1, apidiff=api00-api99)
+> dbclus1<-update(dbclus1, apipct= apidiff/api99)
+>
+> u<-svymean(~api00+apidiff+apipct, dclus1)
+> u.db<-svymean(~api00+apidiff+apipct, dbclus1)
+> all.equal(u, u.db)
+[1] TRUE
+>
+> all.equal(nrow(dclus1),nrow(dbclus1))
+[1] TRUE
+> all.equal(nrow(subset(dclus1,stype=="E")),
++ nrow(subset(dbclus1,stype=="E")))
+[1] TRUE
+>
+> ## replicate weights
+> rclus1<-as.svrepdesign(dclus1)
+> db_rclus1<-svrepdesign(weights=~pw, repweights="wt[1-9]+", type="JK1", scale=(1-15/757)*14/15,
++ data="apiclus1rep",dbtype="SQLite", dbname=system.file("api.db",package="survey"),combined.weights=FALSE)
+> m<-svymean(~api00+api99,rclus1)
+> m.db<-svymean(~api00+api99,db_rclus1)
+> all.equal(m,m.db)
+[1] TRUE
+>
+> summary(db_rclus1)
+DB-backed replicate weight design
+Call: svrepdesign(weights = ~pw, repweights = "wt[1-9]+", type = "JK1",
+ scale = (1 - 15/757) * 14/15, data = "apiclus1rep", dbtype = "SQLite",
+ dbname = system.file("api.db", package = "survey"), combined.weights = FALSE)
+Unstratified cluster jacknife (JK1) with 15 replicates.
+Variables:
+ [1] "row_names" "cds" "stype" "name" "sname"
+ [6] "snum" "dname" "dnum" "cname" "cnum"
+[11] "flag" "pcttest" "api00" "api99" "target"
+[16] "growth" "sch_wide" "comp_imp" "both" "awards"
+[21] "meals" "ell" "yr_rnd" "mobility" "acs_k3"
+[26] "acs_46" "acs_core" "pct_resp" "not_hsg" "hsg"
+[31] "some_col" "col_grad" "grad_sch" "avg_ed" "full__1"
+[36] "emer" "enroll" "api_stu" "fpc" "pw"
+[41] "row_names:1" "wt1" "wt2" "wt3" "wt4"
+[46] "wt5" "wt6" "wt7" "wt8" "wt9"
+[51] "wt10" "wt11" "wt12" "wt13" "wt14"
+[56] "wt15"
+>
+> s<-svymean(~api00, subset(rclus1, comp_imp=="Yes"))
+> s.db<-svymean(~api00, subset(db_rclus1, comp_imp=="Yes"))
+> all.equal(s,s.db)
+[1] TRUE
+>
+> proc.time()
+ user system elapsed
+ 0.497 0.036 0.550
diff --git a/tests/README b/tests/README
new file mode 100644
index 0000000..ddbf613
--- /dev/null
+++ b/tests/README
@@ -0,0 +1,54 @@
+api.R: Run example(api) to check that results haven't changed
+
+bycovmat.R: Check that svyby(,covmat=TRUE) is getting the ordering
+ of estimates correct.
+
+caleg.R: Calibration examples
+ - calibration to information on PSUs rather than population
+ - check that bounded weights really are bounded
+ - check that linear calibration with error proportional to
+ x agrees with ratio estimators
+
+check.R: Many combinations of options for svydesign
+
+deff.R: Regression test on design effects, especially for totals
+
+DBIcheck.R: Check that we get the same results for SQLite-backed and
+ in-memory versions of the API data.
+
+domain.R: Check that domain estimators of means and their standard
+ errors agree with derivations as ratio and regression
+ estimators. Repeat for calibrated and raked designs
+
+fpc.R: Many ways to specify fpc
+
+kalton.R: Calibration examples from Kalton & Flore-Cervantes,
+ J Off Stat, 19(2) 81-97
+
+lonely.psu.R: All the lonely PSU options
+
+multistage.R: Check that a two-stage cluster sample analysis agrees with
+ the hand-calcuated result in Sarndal et al.
+
+nwts.R: Compare results from twophase() to published two-phase
+ case-control example
+
+nwts-cch.R: Compare results from twophase() to case-cohort analyses in
+ survival package.
+
+pps.R: Brewer's approximation for pps without replacement
+
+quantile.R: quantile estimation on a lognormal sample
+
+rakecheck.R: check that raking by iterative post-stratification agrees with
+ raking using calibrate()
+
+regpredict.R: ratio and regression estimation of a total.
+
+scoping.R: check that svyglm and svycoxph work inside functions.
+
+survcurve.R: check that svykm and predict.coxph give the same result
+ when a data set is doubled and the two replicates of each
+ observation are treated as a cluster.
+
+twophase.R: separately verifiable examples of twophase studies
diff --git a/tests/api.R b/tests/api.R
new file mode 100644
index 0000000..a3de0a3
--- /dev/null
+++ b/tests/api.R
@@ -0,0 +1,6 @@
+library(survey)
+options(survey.replicates.mse=TRUE)
+example(api)
+
+options(survey.replicates.mse=FALSE)
+example(api)
diff --git a/tests/api.Rout.save b/tests/api.Rout.save
new file mode 100644
index 0000000..8c336c6
--- /dev/null
+++ b/tests/api.Rout.save
@@ -0,0 +1,440 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> options(survey.replicates.mse=TRUE)
+> example(api)
+
+api> library(survey)
+
+api> data(api)
+
+api> mean(apipop$api00)
+[1] 664.7126
+
+api> sum(apipop$enroll, na.rm=TRUE)
+[1] 3811472
+
+api> #stratified sample
+api> dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+
+api> summary(dstrat)
+Stratified Independent Sampling design
+svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat,
+ fpc = ~fpc)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.02262 0.02262 0.03587 0.04014 0.05339 0.06623
+Stratum Sizes:
+ E H M
+obs 100 50 50
+design.PSU 100 50 50
+actual.PSU 100 50 50
+Population stratum sizes (PSUs):
+ E H M
+4421 755 1018
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "pw" "fpc"
+
+api> svymean(~api00, dstrat)
+ mean SE
+api00 662.29 9.4089
+
+api> svytotal(~enroll, dstrat, na.rm=TRUE)
+ total SE
+enroll 3687178 114642
+
+api> # one-stage cluster sample
+api> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+api> summary(dclus1)
+1 - level Cluster Sampling design
+With (15) clusters.
+svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.02954 0.02954 0.02954 0.02954 0.02954 0.02954
+Population size (PSUs): 757
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+
+api> svymean(~api00, dclus1)
+ mean SE
+api00 644.17 23.542
+
+api> svytotal(~enroll, dclus1, na.rm=TRUE)
+ total SE
+enroll 3404940 932235
+
+api> # two-stage cluster sample
+api> dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
+
+api> summary(dclus2)
+2 - level Cluster Sampling design
+With (40, 126) clusters.
+svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.003669 0.037740 0.052840 0.042390 0.052840 0.052840
+Population size (PSUs): 757
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "pw" "fpc1" "fpc2"
+
+api> svymean(~api00, dclus2)
+ mean SE
+api00 670.81 30.099
+
+api> svytotal(~enroll, dclus2, na.rm=TRUE)
+ total SE
+enroll 2639273 799638
+
+api> # two-stage `with replacement'
+api> dclus2wr<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)
+
+api> summary(dclus2wr)
+2 - level Cluster Sampling design (with replacement)
+With (40, 126) clusters.
+svydesign(id = ~dnum + snum, weights = ~pw, data = apiclus2)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.003669 0.037740 0.052840 0.042390 0.052840 0.052840
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "pw" "fpc1" "fpc2"
+
+api> svymean(~api00, dclus2wr)
+ mean SE
+api00 670.81 30.712
+
+api> svytotal(~enroll, dclus2wr, na.rm=TRUE)
+ total SE
+enroll 2639273 820261
+
+api> # convert to replicate weights
+api> rclus1<-as.svrepdesign(dclus1)
+
+api> summary(rclus1)
+Call: as.svrepdesign(dclus1)
+Unstratified cluster jacknife (JK1) with 15 replicates and MSE variances.
+Variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+
+api> svymean(~api00, rclus1)
+ mean SE
+api00 644.17 26.335
+
+api> svytotal(~enroll, rclus1, na.rm=TRUE)
+ total SE
+enroll 3404940 932235
+
+api> # post-stratify on school type
+api> pop.types<-xtabs(~stype, data=apipop)
+
+api> rclus1p<-postStratify(rclus1, ~stype, pop.types)
+
+api> dclus1p<-postStratify(dclus1, ~stype, pop.types)
+
+api> summary(dclus1p)
+1 - level Cluster Sampling design
+With (15) clusters.
+postStratify(dclus1, ~stype, pop.types)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.01854 0.03257 0.03257 0.03040 0.03257 0.03257
+Population size (PSUs): 757
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+
+api> summary(rclus1p)
+Call: postStratify(rclus1, ~stype, pop.types)
+Unstratified cluster jacknife (JK1) with 15 replicates and MSE variances.
+Variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+
+api> svymean(~api00, dclus1p)
+ mean SE
+api00 642.31 23.921
+
+api> svytotal(~enroll, dclus1p, na.rm=TRUE)
+ total SE
+enroll 3680893 406293
+
+api> svymean(~api00, rclus1p)
+ mean SE
+api00 642.31 26.936
+
+api> svytotal(~enroll, rclus1p, na.rm=TRUE)
+ total SE
+enroll 3680893 473434
+>
+> options(survey.replicates.mse=FALSE)
+> example(api)
+
+api> library(survey)
+
+api> data(api)
+
+api> mean(apipop$api00)
+[1] 664.7126
+
+api> sum(apipop$enroll, na.rm=TRUE)
+[1] 3811472
+
+api> #stratified sample
+api> dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+
+api> summary(dstrat)
+Stratified Independent Sampling design
+svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat,
+ fpc = ~fpc)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.02262 0.02262 0.03587 0.04014 0.05339 0.06623
+Stratum Sizes:
+ E H M
+obs 100 50 50
+design.PSU 100 50 50
+actual.PSU 100 50 50
+Population stratum sizes (PSUs):
+ E H M
+4421 755 1018
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "pw" "fpc"
+
+api> svymean(~api00, dstrat)
+ mean SE
+api00 662.29 9.4089
+
+api> svytotal(~enroll, dstrat, na.rm=TRUE)
+ total SE
+enroll 3687178 114642
+
+api> # one-stage cluster sample
+api> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+
+api> summary(dclus1)
+1 - level Cluster Sampling design
+With (15) clusters.
+svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.02954 0.02954 0.02954 0.02954 0.02954 0.02954
+Population size (PSUs): 757
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+
+api> svymean(~api00, dclus1)
+ mean SE
+api00 644.17 23.542
+
+api> svytotal(~enroll, dclus1, na.rm=TRUE)
+ total SE
+enroll 3404940 932235
+
+api> # two-stage cluster sample
+api> dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
+
+api> summary(dclus2)
+2 - level Cluster Sampling design
+With (40, 126) clusters.
+svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.003669 0.037740 0.052840 0.042390 0.052840 0.052840
+Population size (PSUs): 757
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "pw" "fpc1" "fpc2"
+
+api> svymean(~api00, dclus2)
+ mean SE
+api00 670.81 30.099
+
+api> svytotal(~enroll, dclus2, na.rm=TRUE)
+ total SE
+enroll 2639273 799638
+
+api> # two-stage `with replacement'
+api> dclus2wr<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)
+
+api> summary(dclus2wr)
+2 - level Cluster Sampling design (with replacement)
+With (40, 126) clusters.
+svydesign(id = ~dnum + snum, weights = ~pw, data = apiclus2)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.003669 0.037740 0.052840 0.042390 0.052840 0.052840
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "pw" "fpc1" "fpc2"
+
+api> svymean(~api00, dclus2wr)
+ mean SE
+api00 670.81 30.712
+
+api> svytotal(~enroll, dclus2wr, na.rm=TRUE)
+ total SE
+enroll 2639273 820261
+
+api> # convert to replicate weights
+api> rclus1<-as.svrepdesign(dclus1)
+
+api> summary(rclus1)
+Call: as.svrepdesign(dclus1)
+Unstratified cluster jacknife (JK1) with 15 replicates.
+Variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+
+api> svymean(~api00, rclus1)
+ mean SE
+api00 644.17 26.329
+
+api> svytotal(~enroll, rclus1, na.rm=TRUE)
+ total SE
+enroll 3404940 932235
+
+api> # post-stratify on school type
+api> pop.types<-xtabs(~stype, data=apipop)
+
+api> rclus1p<-postStratify(rclus1, ~stype, pop.types)
+
+api> dclus1p<-postStratify(dclus1, ~stype, pop.types)
+
+api> summary(dclus1p)
+1 - level Cluster Sampling design
+With (15) clusters.
+postStratify(dclus1, ~stype, pop.types)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.01854 0.03257 0.03257 0.03040 0.03257 0.03257
+Population size (PSUs): 757
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+
+api> summary(rclus1p)
+Call: postStratify(rclus1, ~stype, pop.types)
+Unstratified cluster jacknife (JK1) with 15 replicates.
+Variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+
+api> svymean(~api00, dclus1p)
+ mean SE
+api00 642.31 23.921
+
+api> svytotal(~enroll, dclus1p, na.rm=TRUE)
+ total SE
+enroll 3680893 406293
+
+api> svymean(~api00, rclus1p)
+ mean SE
+api00 642.31 26.934
+
+api> svytotal(~enroll, rclus1p, na.rm=TRUE)
+ total SE
+enroll 3680893 473431
+>
+> proc.time()
+ user system elapsed
+ 0.560 0.033 0.608
diff --git a/tests/badcal.R b/tests/badcal.R
new file mode 100644
index 0000000..4d28820
--- /dev/null
+++ b/tests/badcal.R
@@ -0,0 +1,9 @@
+##
+## Calibration with badly-scaled initial weights (bug report by Takahiro Tsuchiya)
+##
+library(survey)
+data <- data.frame(x=c(1,1,1,1,2,2,2,2,2,2), w=rep(10,10))
+des <- svydesign(ids=~1, weights=~w, data=data)
+des.c <- calibrate(des, ~factor(x), c(10000, 5000))
+des.r <- calibrate(des, ~factor(x), c(10000, 5000), calfun='raking')
+stopifnot(all.equal(svytotal(~factor(x), des.c), svytotal(~factor(x), des.r)))
diff --git a/tests/badcal.Rout.save b/tests/badcal.Rout.save
new file mode 100644
index 0000000..5271124
--- /dev/null
+++ b/tests/badcal.Rout.save
@@ -0,0 +1,38 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> ##
+> ## Calibration with badly-scaled initial weights (bug report by Takahiro Tsuchiya)
+> ##
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> data <- data.frame(x=c(1,1,1,1,2,2,2,2,2,2), w=rep(10,10))
+> des <- svydesign(ids=~1, weights=~w, data=data)
+> des.c <- calibrate(des, ~factor(x), c(10000, 5000))
+> des.r <- calibrate(des, ~factor(x), c(10000, 5000), calfun='raking')
+Loading required package: MASS
+> stopifnot(all.equal(svytotal(~factor(x), des.c), svytotal(~factor(x), des.r)))
+>
+> proc.time()
+ user system elapsed
+ 0.162 0.025 0.194
diff --git a/tests/bycovmat.R b/tests/bycovmat.R
new file mode 100644
index 0000000..d16cfd1
--- /dev/null
+++ b/tests/bycovmat.R
@@ -0,0 +1,78 @@
+
+library(survey)
+data(api)
+options(survey.replicates.mse=TRUE)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+rclus1<-as.svrepdesign(dclus1)
+
+a<-svyby(~api00+api99, ~comp.imp+sch.wide,design=rclus1,svymean,
+ covmat=TRUE,drop.empty.groups=FALSE)
+b<-svyby(~api00+api99, ~comp.imp+sch.wide,design=rclus1,svymean,
+ covmat=TRUE,drop.empty.groups=TRUE)
+
+stopifnot(all(na.omit(
+ as.vector(as.matrix(SE(a)))==sqrt(diag(vcov(a)))
+)))
+stopifnot(all(
+ as.vector(as.matrix(SE(b)))==sqrt(diag(vcov(b)))
+ ))
+
+rat <- svyratio(~ell+mobility, ~mobility+meals, dclus1,covmat=TRUE)
+all <- svytotal(~ell+mobility+meals, dclus1)
+
+stopifnot(all(abs(vcov(svycontrast(all,
+ list(quote(ell/mobility),
+ quote(mobility/mobility),
+ quote(ell/meals),quote(mobility/meals))))
+ -vcov(rat))<1e-10))
+
+stopifnot(all(abs(SE(rat)-sqrt(diag(vcov(rat))))<1e-10))
+
+rat <- svyratio(~ell+mobility, ~mobility+meals, rclus1,covmat=TRUE)
+all <- svytotal(~ell+mobility+meals, rclus1, return.replicates=TRUE)
+
+con<-svycontrast(all,
+ list(quote(ell/mobility),
+ quote(mobility/mobility),
+ quote(ell/meals),quote(mobility/meals)))
+
+stopifnot(all(abs(survey:::svrVar(con$replicates, rclus1$scale,rclus1$rscales,mse=rclus1$mse, coef=coef(con))-vcov(rat))<1e-10))
+
+options(survey.replicates.mse=FALSE)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+rclus1<-as.svrepdesign(dclus1)
+
+a<-svyby(~api00+api99, ~comp.imp+sch.wide,design=rclus1,svymean,
+ covmat=TRUE,drop.empty.groups=FALSE)
+b<-svyby(~api00+api99, ~comp.imp+sch.wide,design=rclus1,svymean,
+ covmat=TRUE,drop.empty.groups=TRUE)
+
+stopifnot(all(na.omit(
+ as.vector(as.matrix(SE(a)))==sqrt(diag(vcov(a)))
+)))
+stopifnot(all(
+ as.vector(as.matrix(SE(b)))==sqrt(diag(vcov(b)))
+ ))
+
+rat <- svyratio(~ell+mobility, ~mobility+meals, dclus1,covmat=TRUE)
+all <- svytotal(~ell+mobility+meals, dclus1)
+
+stopifnot(all(abs(vcov(svycontrast(all,
+ list(quote(ell/mobility),
+ quote(mobility/mobility),
+ quote(ell/meals),quote(mobility/meals))))
+ -vcov(rat))<1e-10))
+
+stopifnot(all(abs(SE(rat)-sqrt(diag(vcov(rat))))<1e-10))
+
+rat <- svyratio(~ell+mobility, ~mobility+meals, rclus1,covmat=TRUE)
+all <- svytotal(~ell+mobility+meals, rclus1, return.replicates=TRUE)
+
+con<-svycontrast(all,
+ list(quote(ell/mobility),
+ quote(mobility/mobility),
+ quote(ell/meals),quote(mobility/meals)))
+
+stopifnot(all(abs(survey:::svrVar(con$replicates, rclus1$scale,rclus1$rscales,mse=rclus1$mse, coef=coef(con))-vcov(rat))<1e-10))
+
+
diff --git a/tests/bycovmat.Rout.save b/tests/bycovmat.Rout.save
new file mode 100644
index 0000000..c6ef0c8
--- /dev/null
+++ b/tests/bycovmat.Rout.save
@@ -0,0 +1,106 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+>
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> data(api)
+> options(survey.replicates.mse=TRUE)
+> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+> rclus1<-as.svrepdesign(dclus1)
+>
+> a<-svyby(~api00+api99, ~comp.imp+sch.wide,design=rclus1,svymean,
++ covmat=TRUE,drop.empty.groups=FALSE)
+> b<-svyby(~api00+api99, ~comp.imp+sch.wide,design=rclus1,svymean,
++ covmat=TRUE,drop.empty.groups=TRUE)
+>
+> stopifnot(all(na.omit(
++ as.vector(as.matrix(SE(a)))==sqrt(diag(vcov(a)))
++ )))
+> stopifnot(all(
++ as.vector(as.matrix(SE(b)))==sqrt(diag(vcov(b)))
++ ))
+>
+> rat <- svyratio(~ell+mobility, ~mobility+meals, dclus1,covmat=TRUE)
+> all <- svytotal(~ell+mobility+meals, dclus1)
+>
+> stopifnot(all(abs(vcov(svycontrast(all,
++ list(quote(ell/mobility),
++ quote(mobility/mobility),
++ quote(ell/meals),quote(mobility/meals))))
++ -vcov(rat))<1e-10))
+>
+> stopifnot(all(abs(SE(rat)-sqrt(diag(vcov(rat))))<1e-10))
+>
+> rat <- svyratio(~ell+mobility, ~mobility+meals, rclus1,covmat=TRUE)
+> all <- svytotal(~ell+mobility+meals, rclus1, return.replicates=TRUE)
+>
+> con<-svycontrast(all,
++ list(quote(ell/mobility),
++ quote(mobility/mobility),
++ quote(ell/meals),quote(mobility/meals)))
+>
+> stopifnot(all(abs(survey:::svrVar(con$replicates, rclus1$scale,rclus1$rscales,mse=rclus1$mse, coef=coef(con))-vcov(rat))<1e-10))
+>
+> options(survey.replicates.mse=FALSE)
+> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+> rclus1<-as.svrepdesign(dclus1)
+>
+> a<-svyby(~api00+api99, ~comp.imp+sch.wide,design=rclus1,svymean,
++ covmat=TRUE,drop.empty.groups=FALSE)
+> b<-svyby(~api00+api99, ~comp.imp+sch.wide,design=rclus1,svymean,
++ covmat=TRUE,drop.empty.groups=TRUE)
+>
+> stopifnot(all(na.omit(
++ as.vector(as.matrix(SE(a)))==sqrt(diag(vcov(a)))
++ )))
+> stopifnot(all(
++ as.vector(as.matrix(SE(b)))==sqrt(diag(vcov(b)))
++ ))
+>
+> rat <- svyratio(~ell+mobility, ~mobility+meals, dclus1,covmat=TRUE)
+> all <- svytotal(~ell+mobility+meals, dclus1)
+>
+> stopifnot(all(abs(vcov(svycontrast(all,
++ list(quote(ell/mobility),
++ quote(mobility/mobility),
++ quote(ell/meals),quote(mobility/meals))))
++ -vcov(rat))<1e-10))
+>
+> stopifnot(all(abs(SE(rat)-sqrt(diag(vcov(rat))))<1e-10))
+>
+> rat <- svyratio(~ell+mobility, ~mobility+meals, rclus1,covmat=TRUE)
+> all <- svytotal(~ell+mobility+meals, rclus1, return.replicates=TRUE)
+>
+> con<-svycontrast(all,
++ list(quote(ell/mobility),
++ quote(mobility/mobility),
++ quote(ell/meals),quote(mobility/meals)))
+>
+> stopifnot(all(abs(survey:::svrVar(con$replicates, rclus1$scale,rclus1$rscales,mse=rclus1$mse, coef=coef(con))-vcov(rat))<1e-10))
+>
+>
+>
+> proc.time()
+ user system elapsed
+ 0.299 0.027 0.333
diff --git a/tests/caleg.R b/tests/caleg.R
new file mode 100644
index 0000000..06b5c10
--- /dev/null
+++ b/tests/caleg.R
@@ -0,0 +1,75 @@
+##
+## Calibration examples
+##
+
+
+## Example of calibration to first-stage clusters
+library(survey)
+data(api)
+
+clusters<-table(apiclus2$dnum)
+clusters<-clusters[clusters>1 & names(clusters)!="639"]
+apiclus2a<-subset(apiclus2, dnum %in% as.numeric(names(clusters)))
+
+dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2a)
+
+popclusters<-subset(apipop, dnum %in% as.numeric(names(clusters)))
+
+pop<-lapply(as.numeric(names(clusters)), function(cluster) {
+ colSums(model.matrix(~api99, model.frame(~api99, subset(popclusters, dnum %in% cluster))))})
+
+names(pop)<-names(clusters)
+
+dclus2g<-calibrate(dclus2, ~api99, pop,stage=1)
+
+svymean(~api99, dclus2)
+svymean(~api99, dclus2g)
+
+round(svyby(~api99, ~dnum, design=dclus2, svymean),4)
+
+round(svyby(~api99, ~dnum, design=dclus2g, svymean),4)
+
+## Averaging to first stage
+
+dclus1<- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
+pop<-colSums(cbind(1,apipop$enroll),na.rm=TRUE)
+
+dclus1g<-calibrate(dclus1, ~enroll, pop, aggregate=1)
+
+svytotal(~enroll,dclus1g)
+svytotal(~api.stu,dclus1g)
+
+#variation within clusters should be zero
+all.equal(0, max(ave(weights(dclus1g),dclus1g$cluster,FUN=var),na.rm=TRUE))
+
+##bounded weights
+ dclus1g<-calibrate(dclus1, ~enroll, pop)
+ range(weights(dclus1g)/weights(dclus1))
+ dclus1gb<-calibrate(dclus1, ~enroll, pop, bounds=c(.6,1.5))
+ range(weights(dclus1gb)/weights(dclus1))
+
+## Ratio estimators
+dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+svytotal(~api.stu,dstrat)
+common<-svyratio(~api.stu, ~enroll, dstrat, separate=FALSE)
+total.enroll<-sum(apipop$enroll,na.rm=TRUE)
+predict(common, total=total.enroll)
+dstratg<-calibrate(dstrat,~enroll-1, total.enroll, variance=1)
+svytotal(~api.stu, dstratg)
+
+## postStratify vs calibrate in stratified sample (Ben French)
+
+set.seed(17)
+dat<-data.frame(y=rep(0:1,each=100),x=rnorm(200)+2*rep(0:1,each=100),
+ z=rbinom(200,1,.2), fpc=rep(c(100,10000),each=100))
+dat$w<-ifelse(dat$y,dat$z,1-dat$z)
+popw<-data.frame(w=c("0","1"), Freq=c(2000,8000))
+ des<-svydesign(id=~1,fpc=~fpc, data=dat,strata=~y)
+postStratify(des,~w,popw)->dps
+dcal<-calibrate(des,~factor(w), pop=c(10000,8000))
+
+all.equal(SE(svymean(~x,dcal)),SE(svymean(~x,dps)))
+
+## missing data in calibrated design
+dps$variables$z[1]<-NA
+summary(svyglm(y~z+x,design=dps,family=quasibinomial))
diff --git a/tests/caleg.Rout.save b/tests/caleg.Rout.save
new file mode 100644
index 0000000..7b9a344
--- /dev/null
+++ b/tests/caleg.Rout.save
@@ -0,0 +1,207 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> ##
+> ## Calibration examples
+> ##
+>
+>
+> ## Example of calibration to first-stage clusters
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> data(api)
+>
+> clusters<-table(apiclus2$dnum)
+> clusters<-clusters[clusters>1 & names(clusters)!="639"]
+> apiclus2a<-subset(apiclus2, dnum %in% as.numeric(names(clusters)))
+>
+> dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2a)
+>
+> popclusters<-subset(apipop, dnum %in% as.numeric(names(clusters)))
+>
+> pop<-lapply(as.numeric(names(clusters)), function(cluster) {
++ colSums(model.matrix(~api99, model.frame(~api99, subset(popclusters, dnum %in% cluster))))})
+>
+> names(pop)<-names(clusters)
+>
+> dclus2g<-calibrate(dclus2, ~api99, pop,stage=1)
+>
+> svymean(~api99, dclus2)
+ mean SE
+api99 642.14 31.434
+> svymean(~api99, dclus2g)
+ mean SE
+api99 654.49 29.82
+>
+> round(svyby(~api99, ~dnum, design=dclus2, svymean),4)
+ dnum api99 se
+83 83 694.3333 0.0000
+132 132 505.0000 0.0000
+152 152 574.0000 0.0000
+173 173 894.7500 0.0000
+198 198 533.7500 0.0000
+200 200 589.8000 6.8335
+228 228 477.0000 0.0000
+295 295 646.4000 0.0000
+302 302 903.5000 0.0000
+403 403 852.4000 0.0000
+452 452 533.0000 0.0000
+480 480 614.2000 0.0000
+523 523 580.5000 0.0000
+534 534 564.6000 0.0000
+549 549 896.2000 0.0000
+552 552 730.0000 0.0000
+570 570 518.4000 7.5478
+575 575 800.8000 4.2513
+596 596 785.6000 2.4155
+620 620 591.6000 10.5869
+638 638 560.2000 4.0954
+674 674 760.0000 0.0000
+679 679 610.2500 0.0000
+687 687 718.6667 0.0000
+701 701 651.5000 0.0000
+711 711 690.5000 0.0000
+731 731 702.0000 2.1744
+768 768 562.5000 0.0000
+781 781 854.4000 0.7456
+>
+> round(svyby(~api99, ~dnum, design=dclus2g, svymean),4)
+ dnum api99 se
+83 83 694.3333 0
+132 132 505.0000 0
+152 152 574.0000 0
+173 173 894.7500 0
+198 198 533.7500 0
+200 200 567.5455 0
+228 228 477.0000 0
+295 295 646.4000 0
+302 302 903.5000 0
+403 403 852.4000 0
+452 452 533.0000 0
+480 480 614.2000 0
+523 523 580.5000 0
+534 534 564.6000 0
+549 549 896.2000 0
+552 552 730.0000 0
+570 570 548.9444 0
+575 575 824.5357 0
+596 596 787.5714 0
+620 620 609.3750 0
+638 638 585.6429 0
+674 674 760.0000 0
+679 679 610.2500 0
+687 687 718.6667 0
+701 701 651.5000 0
+711 711 690.5000 0
+731 731 700.6667 0
+768 768 562.5000 0
+781 781 851.0000 0
+>
+> ## Averaging to first stage
+>
+> dclus1<- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
+> pop<-colSums(cbind(1,apipop$enroll),na.rm=TRUE)
+>
+> dclus1g<-calibrate(dclus1, ~enroll, pop, aggregate=1)
+>
+> svytotal(~enroll,dclus1g)
+ total SE
+enroll 3811472 0
+> svytotal(~api.stu,dclus1g)
+ total SE
+api.stu 3242857 38967
+>
+> #variation within clusters should be zero
+> all.equal(0, max(ave(weights(dclus1g),dclus1g$cluster,FUN=var),na.rm=TRUE))
+[1] TRUE
+>
+> ##bounded weights
+> dclus1g<-calibrate(dclus1, ~enroll, pop)
+> range(weights(dclus1g)/weights(dclus1))
+[1] 0.7906782 1.7891164
+> dclus1gb<-calibrate(dclus1, ~enroll, pop, bounds=c(.6,1.5))
+Loading required package: MASS
+> range(weights(dclus1gb)/weights(dclus1))
+[1] 0.7198751 1.5000000
+>
+> ## Ratio estimators
+> dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+> svytotal(~api.stu,dstrat)
+ total SE
+api.stu 3086009 99477
+> common<-svyratio(~api.stu, ~enroll, dstrat, separate=FALSE)
+> total.enroll<-sum(apipop$enroll,na.rm=TRUE)
+> predict(common, total=total.enroll)
+$total
+ enroll
+api.stu 3190038
+
+$se
+ enroll
+api.stu 29565.98
+
+> dstratg<-calibrate(dstrat,~enroll-1, total.enroll, variance=1)
+> svytotal(~api.stu, dstratg)
+ total SE
+api.stu 3190038 29566
+>
+> ## postStratify vs calibrate in stratified sample (Ben French)
+>
+> set.seed(17)
+> dat<-data.frame(y=rep(0:1,each=100),x=rnorm(200)+2*rep(0:1,each=100),
++ z=rbinom(200,1,.2), fpc=rep(c(100,10000),each=100))
+> dat$w<-ifelse(dat$y,dat$z,1-dat$z)
+> popw<-data.frame(w=c("0","1"), Freq=c(2000,8000))
+> des<-svydesign(id=~1,fpc=~fpc, data=dat,strata=~y)
+> postStratify(des,~w,popw)->dps
+> dcal<-calibrate(des,~factor(w), pop=c(10000,8000))
+>
+> all.equal(SE(svymean(~x,dcal)),SE(svymean(~x,dps)))
+[1] TRUE
+>
+> ## missing data in calibrated design
+> dps$variables$z[1]<-NA
+> summary(svyglm(y~z+x,design=dps,family=quasibinomial))
+
+Call:
+svyglm(formula = y ~ z + x, design = dps, family = quasibinomial)
+
+Survey design:
+postStratify(des, ~w, popw)
+
+Coefficients:
+ Estimate Std. Error t value Pr(>|t|)
+(Intercept) -0.1203 0.3380 -0.356 0.722
+z 6.2118 0.6451 9.630 <2e-16 ***
+x 2.2602 0.2514 8.992 <2e-16 ***
+---
+Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+(Dispersion parameter for quasibinomial family taken to be 1.919987)
+
+Number of Fisher Scoring iterations: 9
+
+>
+> proc.time()
+ user system elapsed
+ 1.287 0.034 1.330
diff --git a/tests/check.R b/tests/check.R
new file mode 100755
index 0000000..fffc5fa
--- /dev/null
+++ b/tests/check.R
@@ -0,0 +1,47 @@
+library(survey)
+data(fpc)
+## test various possibilities for svydesign
+a<-svydesign(weights=~weight, ids=~psuid, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+a
+svymean(~x,a)
+a<-svydesign(weights=~weight, ids=~0, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+a
+svymean(~x,a)
+a<-svydesign(weights=1, ids=~0, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+a
+svymean(~x,a)
+a<-svydesign(ids=~0, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+a
+svymean(~x,a)
+a<-svydesign(ids=~0, strata=~stratid, prob=~I(1/weight),variables=~x, data=fpc, nest=TRUE)
+a
+svymean(~x,a)
+a<-svydesign(ids=~psuid, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+a
+svymean(~x,a)
+a<-svydesign(ids=~psuid, variables=~x, data=fpc, nest=TRUE)
+a
+svymean(~x,a)
+a<-svydesign(ids=~psuid, weights=~weight, variables=~x, data=fpc, nest=TRUE)
+a
+svymean(~x,a)
+a<-svydesign(ids=~stratid+psuid, weights=~weight, variables=~x, data=fpc)
+a
+svymean(~x,a)
+a<-svydesign(ids=~stratid+psuid, variables=~x, data=fpc)
+a
+svymean(~x,a)
+a<-svydesign(weights=fpc$weight, ids=fpc$psuid, strata=fpc$stratid, variables=fpc[,"x",drop=FALSE], nest=TRUE)
+a
+svymean(~x,a)
+a<-svydesign(weights=fpc$weight, ids=fpc$psuid, strata=fpc$stratid, variables=fpc[,4:6], nest=TRUE)
+a
+svymean(~x,a)
+
+a<-svydesign(weights=fpc$weight, ids=fpc$psuid, variables=fpc[,4:6], fpc=rep(27,8))
+a
+svymean(~x,a)
+
+a<-svydesign(weights=fpc$weight, ids=fpc$psuid, strata=fpc$stratid, nest=TRUE, variables=fpc[,4:6], fpc=fpc$Nh)
+a
+svymean(~x,a)
diff --git a/tests/check.Rout.save b/tests/check.Rout.save
new file mode 100755
index 0000000..fad4409
--- /dev/null
+++ b/tests/check.Rout.save
@@ -0,0 +1,160 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> data(fpc)
+> ## test various possibilities for svydesign
+> a<-svydesign(weights=~weight, ids=~psuid, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+> a
+Stratified Independent Sampling design (with replacement)
+svydesign(weights = ~weight, ids = ~psuid, strata = ~stratid,
+ variables = ~x, data = fpc, nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 5.4481 0.7413
+> a<-svydesign(weights=~weight, ids=~0, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+> a
+Stratified Independent Sampling design (with replacement)
+svydesign(weights = ~weight, ids = ~0, strata = ~stratid, variables = ~x,
+ data = fpc, nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 5.4481 0.7413
+> a<-svydesign(weights=1, ids=~0, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+> a
+Stratified Independent Sampling design (with replacement)
+svydesign(weights = 1, ids = ~0, strata = ~stratid, variables = ~x,
+ data = fpc, nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 44.2 6.237
+> a<-svydesign(ids=~0, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+Warning message:
+In svydesign.default(ids = ~0, strata = ~stratid, variables = ~x, :
+ No weights or probabilities supplied, assuming equal probability
+> a
+Stratified Independent Sampling design (with replacement)
+svydesign(ids = ~0, strata = ~stratid, variables = ~x, data = fpc,
+ nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 5.525 0.7796
+> a<-svydesign(ids=~0, strata=~stratid, prob=~I(1/weight),variables=~x, data=fpc, nest=TRUE)
+> a
+Stratified Independent Sampling design (with replacement)
+svydesign(ids = ~0, strata = ~stratid, prob = ~I(1/weight), variables = ~x,
+ data = fpc, nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 5.4481 0.7413
+> a<-svydesign(ids=~psuid, strata=~stratid, variables=~x, data=fpc, nest=TRUE)
+Warning message:
+In svydesign.default(ids = ~psuid, strata = ~stratid, variables = ~x, :
+ No weights or probabilities supplied, assuming equal probability
+> a
+Stratified Independent Sampling design (with replacement)
+svydesign(ids = ~psuid, strata = ~stratid, variables = ~x, data = fpc,
+ nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 5.525 0.7796
+> a<-svydesign(ids=~psuid, variables=~x, data=fpc, nest=TRUE)
+Warning message:
+In svydesign.default(ids = ~psuid, variables = ~x, data = fpc, nest = TRUE) :
+ No weights or probabilities supplied, assuming equal probability
+> a
+1 - level Cluster Sampling design (with replacement)
+With (5) clusters.
+svydesign(ids = ~psuid, variables = ~x, data = fpc, nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 5.525 0.838
+> a<-svydesign(ids=~psuid, weights=~weight, variables=~x, data=fpc, nest=TRUE)
+> a
+1 - level Cluster Sampling design (with replacement)
+With (5) clusters.
+svydesign(ids = ~psuid, weights = ~weight, variables = ~x, data = fpc,
+ nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 5.4481 0.7938
+> a<-svydesign(ids=~stratid+psuid, weights=~weight, variables=~x, data=fpc)
+> a
+2 - level Cluster Sampling design (with replacement)
+With (2, 8) clusters.
+svydesign(ids = ~stratid + psuid, weights = ~weight, variables = ~x,
+ data = fpc)
+> svymean(~x,a)
+ mean SE
+x 5.4481 0.5465
+> a<-svydesign(ids=~stratid+psuid, variables=~x, data=fpc)
+Warning message:
+In svydesign.default(ids = ~stratid + psuid, variables = ~x, data = fpc) :
+ No weights or probabilities supplied, assuming equal probability
+> a
+2 - level Cluster Sampling design (with replacement)
+With (2, 8) clusters.
+svydesign(ids = ~stratid + psuid, variables = ~x, data = fpc)
+> svymean(~x,a)
+ mean SE
+x 5.525 0.5188
+> a<-svydesign(weights=fpc$weight, ids=fpc$psuid, strata=fpc$stratid, variables=fpc[,"x",drop=FALSE], nest=TRUE)
+> a
+Stratified Independent Sampling design (with replacement)
+svydesign(weights = fpc$weight, ids = fpc$psuid, strata = fpc$stratid,
+ variables = fpc[, "x", drop = FALSE], nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 5.4481 0.7413
+> a<-svydesign(weights=fpc$weight, ids=fpc$psuid, strata=fpc$stratid, variables=fpc[,4:6], nest=TRUE)
+> a
+Stratified Independent Sampling design (with replacement)
+svydesign(weights = fpc$weight, ids = fpc$psuid, strata = fpc$stratid,
+ variables = fpc[, 4:6], nest = TRUE)
+> svymean(~x,a)
+ mean SE
+x 5.4481 0.7413
+>
+> a<-svydesign(weights=fpc$weight, ids=fpc$psuid, variables=fpc[,4:6], fpc=rep(27,8))
+> a
+1 - level Cluster Sampling design
+With (5) clusters.
+svydesign(weights = fpc$weight, ids = fpc$psuid, variables = fpc[,
+ 4:6], fpc = rep(27, 8))
+> svymean(~x,a)
+ mean SE
+x 5.4481 0.7165
+>
+> a<-svydesign(weights=fpc$weight, ids=fpc$psuid, strata=fpc$stratid, nest=TRUE, variables=fpc[,4:6], fpc=fpc$Nh)
+> a
+Stratified Independent Sampling design
+svydesign(weights = fpc$weight, ids = fpc$psuid, strata = fpc$stratid,
+ nest = TRUE, variables = fpc[, 4:6], fpc = fpc$Nh)
+> svymean(~x,a)
+ mean SE
+x 5.4481 0.616
+>
+> proc.time()
+ user system elapsed
+ 0.201 0.024 0.232
diff --git a/tests/deff.R b/tests/deff.R
new file mode 100644
index 0000000..3b8fb5d
--- /dev/null
+++ b/tests/deff.R
@@ -0,0 +1,29 @@
+## from Takahiro Tsuchiya
+library(survey)
+kigyo<-read.table(tmp<-textConnection(" obs uriage srs.w pps.w
+1 1 15 100 20
+2 2 143 100 200
+3 3 21 100 11
+4 4 51 100 25
+5 5 337 100 550
+6 6 50 100 30
+7 7 274 100 250
+8 8 145 100 100
+9 9 15 100 10
+10 10 86 100 55
+",open="r"),header=TRUE)
+close(tmp)
+des.srs <- svydesign(ids=~1, weights=~srs.w, data=kigyo)
+(res.srs <- svymean(~uriage, des.srs, deff=TRUE))
+(SE(res.srs)^2) / ((1-10/1000) * coef(svyvar(~uriage, des.srs)) / 10)
+
+(tres.srs <- svytotal(~uriage, des.srs, deff=TRUE))
+(SE(tres.srs)^2) / (1000^2 * (1-10/1000) * coef(svyvar(~uriage, des.srs)) / 10)
+
+
+des.pps <- svydesign(ids=~1, weights=~pps.w, data=kigyo)
+(res.pps <- svymean(~uriage, des.pps, deff='replace'))
+(SE(res.pps)^2) / (coef(svyvar(~uriage, des.pps)) / 10)
+(tres.pps <- svytotal(~uriage, des.pps, deff='replace'))
+(N.hat <- sum(weights(des.pps)))
+(SE(tres.pps)^2) / (N.hat^2 * coef(svyvar(~uriage, des.pps)) / 10)
diff --git a/tests/deff.Rout.save b/tests/deff.Rout.save
new file mode 100644
index 0000000..82e6bc1
--- /dev/null
+++ b/tests/deff.Rout.save
@@ -0,0 +1,74 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> ## from Takahiro Tsuchiya
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> kigyo<-read.table(tmp<-textConnection(" obs uriage srs.w pps.w
++ 1 1 15 100 20
++ 2 2 143 100 200
++ 3 3 21 100 11
++ 4 4 51 100 25
++ 5 5 337 100 550
++ 6 6 50 100 30
++ 7 7 274 100 250
++ 8 8 145 100 100
++ 9 9 15 100 10
++ 10 10 86 100 55
++ ",open="r"),header=TRUE)
+> close(tmp)
+> des.srs <- svydesign(ids=~1, weights=~srs.w, data=kigyo)
+> (res.srs <- svymean(~uriage, des.srs, deff=TRUE))
+ mean SE DEff
+uriage 113.700 35.626 1.0101
+> (SE(res.srs)^2) / ((1-10/1000) * coef(svyvar(~uriage, des.srs)) / 10)
+ uriage
+uriage 1.010101
+>
+> (tres.srs <- svytotal(~uriage, des.srs, deff=TRUE))
+ total SE DEff
+uriage 113700 35626 1.0101
+> (SE(tres.srs)^2) / (1000^2 * (1-10/1000) * coef(svyvar(~uriage, des.srs)) / 10)
+ uriage
+uriage 1.010101
+>
+>
+> des.pps <- svydesign(ids=~1, weights=~pps.w, data=kigyo)
+> (res.pps <- svymean(~uriage, des.pps, deff='replace'))
+ mean SE DEff
+uriage 243.914 48.752 1.9741
+> (SE(res.pps)^2) / (coef(svyvar(~uriage, des.pps)) / 10)
+ uriage
+uriage 1.974067
+> (tres.pps <- svytotal(~uriage, des.pps, deff='replace'))
+ total SE DEff
+uriage 305136 184965 18.157
+> (N.hat <- sum(weights(des.pps)))
+[1] 1251
+> (SE(tres.pps)^2) / (N.hat^2 * coef(svyvar(~uriage, des.pps)) / 10)
+ uriage
+uriage 18.15669
+>
+> proc.time()
+ user system elapsed
+ 0.163 0.021 0.189
diff --git a/tests/domain.R b/tests/domain.R
new file mode 100644
index 0000000..d895e32
--- /dev/null
+++ b/tests/domain.R
@@ -0,0 +1,116 @@
+##
+## Domain means can be written as ratio estimators or as regression coefficients
+##
+## This code checks that subsetting the design object gives the same results as
+## these approaches.
+##
+
+
+library(survey)
+data(fpc)
+dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE)
+dsub<-subset(dfpc,x>4)
+(m1<-svymean(~x,design=dsub))
+
+## These should give the same domain estimates and standard errors
+(m2<-svyby(~x,~I(x>4),design=dfpc, svymean,keep.var=TRUE))
+m3<-svyglm(x~I(x>4)+0,design=dfpc)
+summary(m3)
+(m4<-svyratio(~I(x*(x>4)),~as.numeric(x>4), dfpc))
+stopifnot(isTRUE(all.equal(SE(m2), as.vector(SE(m3)))))
+stopifnot(isTRUE(all.equal(SE(m2)[2], as.vector(SE(m4)))))
+
+## with strata
+data(api)
+dstrat<-svydesign(id=~1, strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+m1<-svymean(~enroll, subset(dstrat, comp.imp=="Yes"))
+m2<-svyglm(enroll~comp.imp-1, dstrat)
+m3<- svyratio(~I(enroll*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dstrat)
+stopifnot(isTRUE(all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
+stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
+
+## with calibration
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
+(dclus1g3 <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069)))
+
+m1<-svymean(~api00, subset(dclus1g3, comp.imp=="Yes"))
+m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1g3)
+m2<-svyglm(api00~comp.imp-1, dclus1g3)
+stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
+stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
+
+## with raking
+pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
+pop.schwide <- data.frame(sch.wide=c("No","Yes"), Freq=c(1072,5122))
+dclus1r<-rake(dclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
+m1<-svymean(~api00, subset(dclus1r, comp.imp=="Yes"))
+m2<-svyglm(api00~comp.imp-1, dclus1r)
+m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1r)
+stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
+stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
+
+
+
+##
+## based on bug report from Takahiro Tsuchiya for version 3.4
+##
+rei<-read.table(tmp<-textConnection(
+" id N n.a h n.ah n.h sub y
+1 1 300 20 1 12 5 TRUE 1
+2 2 300 20 1 12 5 TRUE 2
+3 3 300 20 1 12 5 TRUE 3
+4 4 300 20 1 12 5 TRUE 4
+5 5 300 20 1 12 5 TRUE 5
+6 6 300 20 1 12 5 FALSE NA
+7 7 300 20 1 12 5 FALSE NA
+8 8 300 20 1 12 5 FALSE NA
+9 9 300 20 1 12 5 FALSE NA
+10 10 300 20 1 12 5 FALSE NA
+11 11 300 20 1 12 5 FALSE NA
+12 12 300 20 1 12 5 FALSE NA
+13 13 300 20 2 8 3 TRUE 6
+14 14 300 20 2 8 3 TRUE 7
+15 15 300 20 2 8 3 TRUE 8
+16 16 300 20 2 8 3 FALSE NA
+17 17 300 20 2 8 3 FALSE NA
+18 18 300 20 2 8 3 FALSE NA
+19 19 300 20 2 8 3 FALSE NA
+20 20 300 20 2 8 3 FALSE NA
+"), header=TRUE)
+close(tmp)
+
+
+des.rei2 <- twophase(id=list(~id,~id), strata=list(NULL,~h),
+ fpc=list(~N,NULL), subset=~sub, data=rei, method="full")
+tot2<- svytotal(~y, subset(des.rei2, y>3))
+
+rei$y<-rei$y*(rei$y>3)
+## based on Sarndal et al (9.4.14)
+rei$w.ah <- rei$n.ah / rei$n.a
+a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE)
+a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE)
+a.rei$y.u <- sum(a.rei$w.ah * a.rei$y)
+V <- with(a.rei, sum(N * (N-1) * ((n.ah-1)/(n.a-1) - (n.h-1)/(N-1)) * w.ah * S.ysh / n.h))
+V <- V + with(a.rei, sum(N * (N-n.a) * w.ah * (y - y.u)^2 / (n.a-1)))
+
+a.rei$f.h<-with(a.rei, n.h/n.ah)
+Vphase2<-with(a.rei, sum(N*N*w.ah^2* ((1-f.h)/n.h) *S.ysh))
+
+a.rei$f<-with(a.rei, n.a/N)
+a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1))
+Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2)))
+
+V
+Vphase1
+Vphase2
+vcov(tot2)
+
+## comparing to regression
+reg<-svyglm(y~I(y<4), design=des.rei2)
+mn<-svymean(~y, subset(des.rei2,y>3))
+all.equal(as.vector(coef(reg))[1],as.vector(coef(mn)))
+all.equal(as.vector(SE(reg))[1],as.vector(SE(mn)))
+vcov(mn)
+vcov(reg)
+
diff --git a/tests/domain.Rout.save b/tests/domain.Rout.save
new file mode 100644
index 0000000..4b8d473
--- /dev/null
+++ b/tests/domain.Rout.save
@@ -0,0 +1,220 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> ##
+> ## Domain means can be written as ratio estimators or as regression coefficients
+> ##
+> ## This code checks that subsetting the design object gives the same results as
+> ## these approaches.
+> ##
+>
+>
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> data(fpc)
+> dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE)
+> dsub<-subset(dfpc,x>4)
+> (m1<-svymean(~x,design=dsub))
+ mean SE
+x 6.195 0.7555
+>
+> ## These should give the same domain estimates and standard errors
+> (m2<-svyby(~x,~I(x>4),design=dfpc, svymean,keep.var=TRUE))
+ I(x > 4) x se
+FALSE FALSE 3.314286 0.3117042
+TRUE TRUE 6.195000 0.7555129
+> m3<-svyglm(x~I(x>4)+0,design=dfpc)
+> summary(m3)
+
+Call:
+svyglm(formula = x ~ I(x > 4) + 0, design = dfpc)
+
+Survey design:
+svydesign(id = ~psuid, strat = ~stratid, weight = ~weight, data = fpc,
+ nest = TRUE)
+
+Coefficients:
+ Estimate Std. Error t value Pr(>|t|)
+I(x > 4)FALSE 3.3143 0.3117 10.63 0.000127 ***
+I(x > 4)TRUE 6.1950 0.7555 8.20 0.000439 ***
+---
+Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+(Dispersion parameter for gaussian family taken to be 2.557379)
+
+Number of Fisher Scoring iterations: 2
+
+> (m4<-svyratio(~I(x*(x>4)),~as.numeric(x>4), dfpc))
+Ratio estimator: svyratio.survey.design2(~I(x * (x > 4)), ~as.numeric(x > 4),
+ dfpc)
+Ratios=
+ as.numeric(x > 4)
+I(x * (x > 4)) 6.195
+SEs=
+ as.numeric(x > 4)
+I(x * (x > 4)) 0.7555129
+> stopifnot(isTRUE(all.equal(SE(m2), as.vector(SE(m3)))))
+> stopifnot(isTRUE(all.equal(SE(m2)[2], as.vector(SE(m4)))))
+>
+> ## with strata
+> data(api)
+> dstrat<-svydesign(id=~1, strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+> m1<-svymean(~enroll, subset(dstrat, comp.imp=="Yes"))
+> m2<-svyglm(enroll~comp.imp-1, dstrat)
+> m3<- svyratio(~I(enroll*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dstrat)
+> stopifnot(isTRUE(all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
+> stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
+>
+> ## with calibration
+> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
+> (dclus1g3 <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069)))
+1 - level Cluster Sampling design
+With (15) clusters.
+calibrate(dclus1, ~stype + api99, c(pop.totals, api99 = 3914069))
+>
+> m1<-svymean(~api00, subset(dclus1g3, comp.imp=="Yes"))
+> m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1g3)
+> m2<-svyglm(api00~comp.imp-1, dclus1g3)
+> stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
+> stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
+>
+> ## with raking
+> pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
+> pop.schwide <- data.frame(sch.wide=c("No","Yes"), Freq=c(1072,5122))
+> dclus1r<-rake(dclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
+> m1<-svymean(~api00, subset(dclus1r, comp.imp=="Yes"))
+> m2<-svyglm(api00~comp.imp-1, dclus1r)
+> m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1r)
+> stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
+> stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
+>
+>
+>
+> ##
+> ## based on bug report from Takahiro Tsuchiya for version 3.4
+> ##
+> rei<-read.table(tmp<-textConnection(
++ " id N n.a h n.ah n.h sub y
++ 1 1 300 20 1 12 5 TRUE 1
++ 2 2 300 20 1 12 5 TRUE 2
++ 3 3 300 20 1 12 5 TRUE 3
++ 4 4 300 20 1 12 5 TRUE 4
++ 5 5 300 20 1 12 5 TRUE 5
++ 6 6 300 20 1 12 5 FALSE NA
++ 7 7 300 20 1 12 5 FALSE NA
++ 8 8 300 20 1 12 5 FALSE NA
++ 9 9 300 20 1 12 5 FALSE NA
++ 10 10 300 20 1 12 5 FALSE NA
++ 11 11 300 20 1 12 5 FALSE NA
++ 12 12 300 20 1 12 5 FALSE NA
++ 13 13 300 20 2 8 3 TRUE 6
++ 14 14 300 20 2 8 3 TRUE 7
++ 15 15 300 20 2 8 3 TRUE 8
++ 16 16 300 20 2 8 3 FALSE NA
++ 17 17 300 20 2 8 3 FALSE NA
++ 18 18 300 20 2 8 3 FALSE NA
++ 19 19 300 20 2 8 3 FALSE NA
++ 20 20 300 20 2 8 3 FALSE NA
++ "), header=TRUE)
+> close(tmp)
+>
+>
+> des.rei2 <- twophase(id=list(~id,~id), strata=list(NULL,~h),
++ fpc=list(~N,NULL), subset=~sub, data=rei, method="full")
+> tot2<- svytotal(~y, subset(des.rei2, y>3))
+>
+> rei$y<-rei$y*(rei$y>3)
+> ## based on Sarndal et al (9.4.14)
+> rei$w.ah <- rei$n.ah / rei$n.a
+> a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE)
+> a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE)
+> a.rei$y.u <- sum(a.rei$w.ah * a.rei$y)
+> V <- with(a.rei, sum(N * (N-1) * ((n.ah-1)/(n.a-1) - (n.h-1)/(N-1)) * w.ah * S.ysh / n.h))
+> V <- V + with(a.rei, sum(N * (N-n.a) * w.ah * (y - y.u)^2 / (n.a-1)))
+>
+> a.rei$f.h<-with(a.rei, n.h/n.ah)
+> Vphase2<-with(a.rei, sum(N*N*w.ah^2* ((1-f.h)/n.h) *S.ysh))
+>
+> a.rei$f<-with(a.rei, n.a/N)
+> a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1))
+> Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2)))
+>
+> V
+[1] 70761.47
+> Vphase1
+[1] 44325.47
+> Vphase2
+[1] 26436
+> vcov(tot2)
+ [,1]
+[1,] 70761.47
+attr(,"phases")
+attr(,"phases")$phase1
+ [,1]
+[1,] 44325.47
+
+attr(,"phases")$phase2
+ [,1]
+[1,] 26436
+
+>
+> ## comparing to regression
+> reg<-svyglm(y~I(y<4), design=des.rei2)
+> mn<-svymean(~y, subset(des.rei2,y>3))
+> all.equal(as.vector(coef(reg))[1],as.vector(coef(mn)))
+[1] TRUE
+> all.equal(as.vector(SE(reg))[1],as.vector(SE(mn)))
+[1] TRUE
+> vcov(mn)
+ [,1]
+[1,] 0.3292258
+attr(,"phases")
+attr(,"phases")$phase1
+ [,1]
+[1,] 0.1599264
+
+attr(,"phases")$phase2
+ [,1]
+[1,] 0.1692994
+
+> vcov(reg)
+ (Intercept) I(y < 4)TRUE
+(Intercept) 0.3292258 -0.3292258
+I(y < 4)TRUE -0.3292258 0.5901907
+attr(,"phases")
+attr(,"phases")$phase1
+ (Intercept) I(y < 4)TRUE
+(Intercept) 0.1599264 -0.1599264
+I(y < 4)TRUE -0.1599264 0.2588542
+
+attr(,"phases")$phase2
+ (Intercept) I(y < 4)TRUE
+(Intercept) 0.1692994 -0.1692994
+I(y < 4)TRUE -0.1692994 0.3313365
+
+>
+>
+> proc.time()
+ user system elapsed
+ 1.707 0.055 1.778
diff --git a/tests/fpc.R b/tests/fpc.R
new file mode 100644
index 0000000..7964e8a
--- /dev/null
+++ b/tests/fpc.R
@@ -0,0 +1,4 @@
+library(survey)
+## check many permutations of fpc specification
+example(fpc)
+
diff --git a/tests/fpc.Rout.save b/tests/fpc.Rout.save
new file mode 100644
index 0000000..9902123
--- /dev/null
+++ b/tests/fpc.Rout.save
@@ -0,0 +1,105 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> ## check many permutations of fpc specification
+> example(fpc)
+
+fpc> data(fpc)
+
+fpc> fpc
+ stratid psuid weight nh Nh x
+1 1 1 3 5 15 2.8
+2 1 2 3 5 15 4.1
+3 1 3 3 5 15 6.8
+4 1 4 3 5 15 6.8
+5 1 5 3 5 15 9.2
+6 2 1 4 3 12 3.7
+7 2 2 4 3 12 6.6
+8 2 3 4 3 12 4.2
+
+fpc> withoutfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid, variables=~x,
+fpc+ data=fpc, nest=TRUE)
+
+fpc> withoutfpc
+Stratified Independent Sampling design (with replacement)
+svydesign(weights = ~weight, ids = ~psuid, strata = ~stratid,
+ variables = ~x, data = fpc, nest = TRUE)
+
+fpc> svymean(~x, withoutfpc)
+ mean SE
+x 5.4481 0.7413
+
+fpc> withfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid,
+fpc+ fpc=~Nh, variables=~x, data=fpc, nest=TRUE)
+
+fpc> withfpc
+Stratified Independent Sampling design
+svydesign(weights = ~weight, ids = ~psuid, strata = ~stratid,
+ fpc = ~Nh, variables = ~x, data = fpc, nest = TRUE)
+
+fpc> svymean(~x, withfpc)
+ mean SE
+x 5.4481 0.616
+
+fpc> ## Other equivalent forms
+fpc> withfpc<-svydesign(prob=~I(1/weight), ids=~psuid, strata=~stratid,
+fpc+ fpc=~Nh, variables=~x, data=fpc, nest=TRUE)
+
+fpc> svymean(~x, withfpc)
+ mean SE
+x 5.4481 0.616
+
+fpc> withfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid,
+fpc+ fpc=~I(nh/Nh), variables=~x, data=fpc, nest=TRUE)
+
+fpc> svymean(~x, withfpc)
+ mean SE
+x 5.4481 0.616
+
+fpc> withfpc<-svydesign(weights=~weight, ids=~interaction(stratid,psuid),
+fpc+ strata=~stratid, fpc=~I(nh/Nh), variables=~x, data=fpc)
+
+fpc> svymean(~x, withfpc)
+ mean SE
+x 5.4481 0.616
+
+fpc> withfpc<-svydesign(ids=~psuid, strata=~stratid, fpc=~Nh,
+fpc+ variables=~x,data=fpc,nest=TRUE)
+
+fpc> svymean(~x, withfpc)
+ mean SE
+x 5.4481 0.616
+
+fpc> withfpc<-svydesign(ids=~psuid, strata=~stratid,
+fpc+ fpc=~I(nh/Nh), variables=~x, data=fpc, nest=TRUE)
+
+fpc> svymean(~x, withfpc)
+ mean SE
+x 5.4481 0.616
+>
+>
+> proc.time()
+ user system elapsed
+ 0.194 0.025 0.229
diff --git a/tests/kalton.R b/tests/kalton.R
new file mode 100644
index 0000000..6aeccdf
--- /dev/null
+++ b/tests/kalton.R
@@ -0,0 +1,48 @@
+library(survey)
+
+ab<-expand.grid(a=factor(1:4),b=factor(1:3))
+
+kaltonsample<-ab[rep(1:12,c(20,50,100,30,40,140,50,100,40,310,50,70)),]
+
+kaltonpop<-ab[rep(1:12,c(80,60,170,55,40,150,60,165,55,340,200,125)),]
+
+jointpop<-colSums(model.matrix(~a*b,kaltonpop))
+marginalpop<-colSums(model.matrix(~a+b,kaltonpop))
+gregpop<-colSums(model.matrix(~as.numeric(a)+as.numeric(b),kaltonpop))
+
+dkalton<-svydesign(id=~1,data=kaltonsample)
+
+dps<-postStratify(dkalton,~a+b,xtabs(~a+b,kaltonpop))
+
+drake<-rake(dkalton, list(~a,~b),list(xtabs(~a,kaltonpop),xtabs(~b,kaltonpop)),control=list(epsilon=0.0001))
+
+dcalps<-calibrate(dkalton, ~a*b, jointpop)
+dcalrake<-calibrate(dkalton,~a+b, marginalpop, calfun="raking")
+dlinear<-calibrate(dkalton, ~a+b, marginalpop)
+
+dtrunclinear<-calibrate(dkalton, ~a+b, marginalpop,bounds=c(0.5,2.2))
+
+dlogit<-calibrate(dkalton, ~a+b, marginalpop,bounds=c(0.5,2.2),calfun="logit")
+
+dgreg<-calibrate(dkalton,~as.numeric(a)+as.numeric(b), gregpop)
+
+
+#table A
+ round(svytable(~a+b,dps)/xtabs(~a+b,kaltonsample),2)
+ round(svytable(~a+b,dcalps)/xtabs(~a+b,kaltonsample),2)
+
+#table B
+ round(svytable(~a+b,drake)/xtabs(~a+b,kaltonsample),2)
+ round(svytable(~a+b,dcalrake)/xtabs(~a+b,kaltonsample),2)
+
+#table C
+round(svytable(~a+b,dlinear)/xtabs(~a+b,kaltonsample),2)
+
+#table D
+round(svytable(~a+b,dgreg)/xtabs(~a+b,kaltonsample),2)
+
+#table G
+round(svytable(~a+b,dlogit)/xtabs(~a+b,kaltonsample),2)
+
+#table G
+round(svytable(~a+b,dtrunclinear)/xtabs(~a+b,kaltonsample),2)
diff --git a/tests/kalton.Rout.save b/tests/kalton.Rout.save
new file mode 100644
index 0000000..70e52a9
--- /dev/null
+++ b/tests/kalton.Rout.save
@@ -0,0 +1,128 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+>
+> ab<-expand.grid(a=factor(1:4),b=factor(1:3))
+>
+> kaltonsample<-ab[rep(1:12,c(20,50,100,30,40,140,50,100,40,310,50,70)),]
+>
+> kaltonpop<-ab[rep(1:12,c(80,60,170,55,40,150,60,165,55,340,200,125)),]
+>
+> jointpop<-colSums(model.matrix(~a*b,kaltonpop))
+> marginalpop<-colSums(model.matrix(~a+b,kaltonpop))
+> gregpop<-colSums(model.matrix(~as.numeric(a)+as.numeric(b),kaltonpop))
+>
+> dkalton<-svydesign(id=~1,data=kaltonsample)
+Warning message:
+In svydesign.default(id = ~1, data = kaltonsample) :
+ No weights or probabilities supplied, assuming equal probability
+>
+> dps<-postStratify(dkalton,~a+b,xtabs(~a+b,kaltonpop))
+>
+> drake<-rake(dkalton, list(~a,~b),list(xtabs(~a,kaltonpop),xtabs(~b,kaltonpop)),control=list(epsilon=0.0001))
+>
+> dcalps<-calibrate(dkalton, ~a*b, jointpop)
+> dcalrake<-calibrate(dkalton,~a+b, marginalpop, calfun="raking")
+Loading required package: MASS
+> dlinear<-calibrate(dkalton, ~a+b, marginalpop)
+>
+> dtrunclinear<-calibrate(dkalton, ~a+b, marginalpop,bounds=c(0.5,2.2))
+>
+> dlogit<-calibrate(dkalton, ~a+b, marginalpop,bounds=c(0.5,2.2),calfun="logit")
+>
+> dgreg<-calibrate(dkalton,~as.numeric(a)+as.numeric(b), gregpop)
+>
+>
+> #table A
+> round(svytable(~a+b,dps)/xtabs(~a+b,kaltonsample),2)
+ b
+a 1 2 3
+ 1 4.00 1.00 1.38
+ 2 1.20 1.07 1.10
+ 3 1.70 1.20 4.00
+ 4 1.83 1.65 1.79
+> round(svytable(~a+b,dcalps)/xtabs(~a+b,kaltonsample),2)
+ b
+a 1 2 3
+ 1 4.00 1.00 1.37
+ 2 1.20 1.07 1.10
+ 3 1.70 1.20 4.00
+ 4 1.83 1.65 1.79
+>
+> #table B
+> round(svytable(~a+b,drake)/xtabs(~a+b,kaltonsample),2)
+ b
+a 1 2 3
+ 1 1.81 1.45 2.02
+ 2 1.08 0.87 1.21
+ 3 2.20 1.76 2.45
+ 4 1.83 1.47 2.04
+> round(svytable(~a+b,dcalrake)/xtabs(~a+b,kaltonsample),2)
+ b
+a 1 2 3
+ 1 1.81 1.45 2.02
+ 2 1.08 0.87 1.21
+ 3 2.20 1.76 2.45
+ 4 1.83 1.47 2.04
+>
+> #table C
+> round(svytable(~a+b,dlinear)/xtabs(~a+b,kaltonsample),2)
+ b
+a 1 2 3
+ 1 1.82 1.50 1.97
+ 2 1.09 0.78 1.24
+ 3 2.19 1.88 2.34
+ 4 1.83 1.52 1.98
+>
+> #table D
+> round(svytable(~a+b,dgreg)/xtabs(~a+b,kaltonsample),2)
+ b
+a 1 2 3
+ 1 1.21 1.17 1.14
+ 2 1.43 1.40 1.36
+ 3 1.66 1.62 1.59
+ 4 1.88 1.85 1.81
+>
+> #table G
+> round(svytable(~a+b,dlogit)/xtabs(~a+b,kaltonsample),2)
+ b
+a 1 2 3
+ 1 1.87 1.46 1.98
+ 2 1.08 0.74 1.27
+ 3 2.17 2.09 2.18
+ 4 1.89 1.49 1.99
+>
+> #table G
+> round(svytable(~a+b,dtrunclinear)/xtabs(~a+b,kaltonsample),2)
+ b
+a 1 2 3
+ 1 1.81 1.48 1.99
+ 2 1.08 0.75 1.26
+ 3 2.20 2.00 2.20
+ 4 1.83 1.50 2.00
+>
+> proc.time()
+ user system elapsed
+ 0.248 0.027 0.282
diff --git a/tests/lonely.psu.R b/tests/lonely.psu.R
new file mode 100755
index 0000000..67498ca
--- /dev/null
+++ b/tests/lonely.psu.R
@@ -0,0 +1,81 @@
+
+## lonely PSUs by design
+library(survey)
+data(api)
+## not certainty PSUs by fpc
+ds<-svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = apiclus1)
+summary(ds)
+
+options(survey.lonely.psu="fail")
+try(svymean(~api00,ds))
+try(svymean(~api00, as.svrepdesign(ds)))
+options(survey.lonely.psu="remove")
+svymean(~api00,ds)
+svymean(~api00, as.svrepdesign(ds))
+options(survey.lonely.psu="certainty")
+svymean(~api00,ds)
+svymean(~api00, as.svrepdesign(ds))
+options(survey.lonely.psu="adjust")
+svymean(~api00,ds)
+svymean(~api00, as.svrepdesign(ds))
+options(survey.lonely.psu="average")
+svymean(~api00,ds)
+svymean(~api00, as.svrepdesign(ds))
+
+## fpc specified
+fpc<-ifelse(apiclus1$dnum==413, 1,1000)
+ds<-svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = apiclus1,fpc=fpc)
+summary(ds)
+
+options(survey.lonely.psu="fail")
+try(svymean(~api00,ds))
+svymean(~api00, as.svrepdesign(ds))
+options(survey.lonely.psu="remove")
+svymean(~api00,ds)
+svymean(~api00, as.svrepdesign(ds))
+options(survey.lonely.psu="certainty")
+svymean(~api00,ds)
+svymean(~api00, as.svrepdesign(ds))
+options(survey.lonely.psu="adjust")
+svymean(~api00,ds)
+svymean(~api00, as.svrepdesign(ds))
+options(survey.lonely.psu="average")
+svymean(~api00,ds)
+svymean(~api00, as.svrepdesign(ds))
+
+rs<-as.svrepdesign(ds)
+svytotal(~api00,rs)
+SE(svytotal(~api00,subset(rs, dnum==413)))==0
+
+## lonely PSUs after subsetting
+ds<-svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = subset(apiclus1,dnum !=413))
+ds1<-ds[-31,]
+summary(ds1)
+
+options(survey.lonely.psu="fail")
+svymean(~api00,ds1)
+options(survey.lonely.psu="remove")
+svymean(~api00,ds1)
+options(survey.lonely.psu="certainty")
+svymean(~api00,ds1)
+options(survey.lonely.psu="adjust")
+svymean(~api00,ds1)
+options(survey.lonely.psu="average")
+svymean(~api00,ds1)
+
+## with adjustment
+options(survey.adjust.domain.lonely=TRUE)
+ds<-svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = subset(apiclus1,dnum !=413))
+ds1<-ds[-31,]
+summary(ds1)
+
+options(survey.lonely.psu="fail")
+try(svymean(~api00,ds1))
+options(survey.lonely.psu="remove")
+svymean(~api00,ds1)
+options(survey.lonely.psu="certainty")
+svymean(~api00,ds1)
+options(survey.lonely.psu="adjust")
+svymean(~api00,ds1)
+options(survey.lonely.psu="average")
+svymean(~api00,ds1)
diff --git a/tests/lonely.psu.Rout.save b/tests/lonely.psu.Rout.save
new file mode 100755
index 0000000..4880390
--- /dev/null
+++ b/tests/lonely.psu.Rout.save
@@ -0,0 +1,265 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+>
+> ## lonely PSUs by design
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> data(api)
+> ## not certainty PSUs by fpc
+> ds<-svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = apiclus1)
+> summary(ds)
+Stratified Independent Sampling design (with replacement)
+svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = apiclus1)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.02954 0.02954 0.02954 0.02954 0.02954 0.02954
+Stratum Sizes:
+ 61 135 178 197 255 406 413 437 448 510 568 637 716 778 815
+obs 13 34 4 13 16 2 1 4 12 21 9 11 37 2 4
+design.PSU 13 34 4 13 16 2 1 4 12 21 9 11 37 2 4
+actual.PSU 13 34 4 13 16 2 1 4 12 21 9 11 37 2 4
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+>
+> options(survey.lonely.psu="fail")
+> try(svymean(~api00,ds))
+Error in onestrat(x[index, , drop = FALSE], clusters[index], nPSU[index][1], :
+ Stratum (413) has only one PSU at stage 1
+> try(svymean(~api00, as.svrepdesign(ds)))
+Error in jknweights(design$strata[, 1], design$cluster[, 1], fpc = fpc, :
+ Stratum413has only one PSU
+> options(survey.lonely.psu="remove")
+> svymean(~api00,ds)
+ mean SE
+api00 644.17 5.8058
+> svymean(~api00, as.svrepdesign(ds))
+ mean SE
+api00 644.17 5.8058
+> options(survey.lonely.psu="certainty")
+> svymean(~api00,ds)
+ mean SE
+api00 644.17 5.8058
+> svymean(~api00, as.svrepdesign(ds))
+ mean SE
+api00 644.17 5.8058
+> options(survey.lonely.psu="adjust")
+> svymean(~api00,ds)
+ mean SE
+api00 644.17 5.8281
+> svymean(~api00, as.svrepdesign(ds))
+ mean SE
+api00 644.17 5.8267
+> options(survey.lonely.psu="average")
+> svymean(~api00,ds)
+ mean SE
+api00 644.17 6.0096
+> svymean(~api00, as.svrepdesign(ds))
+ mean SE
+api00 644.17 5.8217
+>
+> ## fpc specified
+> fpc<-ifelse(apiclus1$dnum==413, 1,1000)
+> ds<-svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = apiclus1,fpc=fpc)
+> summary(ds)
+Stratified Independent Sampling design
+svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = apiclus1,
+ fpc = fpc)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.02954 0.02954 0.02954 0.02954 0.02954 0.02954
+Stratum Sizes:
+ 61 135 178 197 255 406 413 437 448 510 568 637 716 778 815
+obs 13 34 4 13 16 2 1 4 12 21 9 11 37 2 4
+design.PSU 13 34 4 13 16 2 1 4 12 21 9 11 37 2 4
+actual.PSU 13 34 4 13 16 2 1 4 12 21 9 11 37 2 4
+Population stratum sizes (PSUs):
+ 135 178 197 255 406 413 437 448 510 568 61 637 716 778 815
+1000 1000 1000 1000 1000 1 1000 1000 1000 1000 1000 1000 1000 1000 1000
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+>
+> options(survey.lonely.psu="fail")
+> try(svymean(~api00,ds))
+ mean SE
+api00 644.17 5.7344
+> svymean(~api00, as.svrepdesign(ds))
+ mean SE
+api00 644.17 5.7344
+> options(survey.lonely.psu="remove")
+> svymean(~api00,ds)
+ mean SE
+api00 644.17 5.7344
+> svymean(~api00, as.svrepdesign(ds))
+ mean SE
+api00 644.17 5.7344
+> options(survey.lonely.psu="certainty")
+> svymean(~api00,ds)
+ mean SE
+api00 644.17 5.7344
+> svymean(~api00, as.svrepdesign(ds))
+ mean SE
+api00 644.17 5.7344
+> options(survey.lonely.psu="adjust")
+> svymean(~api00,ds)
+ mean SE
+api00 644.17 5.7344
+> svymean(~api00, as.svrepdesign(ds))
+ mean SE
+api00 644.17 5.7344
+> options(survey.lonely.psu="average")
+> svymean(~api00,ds)
+ mean SE
+api00 644.17 5.7344
+> svymean(~api00, as.svrepdesign(ds))
+ mean SE
+api00 644.17 5.7501
+>
+> rs<-as.svrepdesign(ds)
+> svytotal(~api00,rs)
+ total SE
+api00 3989986 35616
+> SE(svytotal(~api00,subset(rs, dnum==413)))==0
+[1] TRUE
+>
+> ## lonely PSUs after subsetting
+> ds<-svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = subset(apiclus1,dnum !=413))
+> ds1<-ds[-31,]
+> summary(ds1)
+Stratified Independent Sampling design (with replacement)
+svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = subset(apiclus1,
+ dnum != 413))
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.02954 0.02954 0.02954 0.02954 0.02954 0.02954
+Stratum Sizes:
+ 61 135 178 197 255 406 437 448 510 568 637 716 778 815
+obs 13 34 4 13 16 1 4 12 21 9 11 37 2 4
+design.PSU 13 34 4 13 16 2 4 12 21 9 11 37 2 4
+actual.PSU 13 34 4 13 16 1 4 12 21 9 11 37 2 4
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+>
+> options(survey.lonely.psu="fail")
+> svymean(~api00,ds1)
+ mean SE
+api00 645.14 5.8909
+> options(survey.lonely.psu="remove")
+> svymean(~api00,ds1)
+ mean SE
+api00 645.14 5.8909
+> options(survey.lonely.psu="certainty")
+> svymean(~api00,ds1)
+ mean SE
+api00 645.14 5.8909
+> options(survey.lonely.psu="adjust")
+> svymean(~api00,ds1)
+ mean SE
+api00 645.14 5.8909
+> options(survey.lonely.psu="average")
+> svymean(~api00,ds1)
+ mean SE
+api00 645.14 5.8909
+>
+> ## with adjustment
+> options(survey.adjust.domain.lonely=TRUE)
+> ds<-svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = subset(apiclus1,dnum !=413))
+> ds1<-ds[-31,]
+> summary(ds1)
+Stratified Independent Sampling design (with replacement)
+svydesign(id = ~1, weights = ~pw, strata = ~dnum, data = subset(apiclus1,
+ dnum != 413))
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+0.02954 0.02954 0.02954 0.02954 0.02954 0.02954
+Stratum Sizes:
+ 61 135 178 197 255 406 437 448 510 568 637 716 778 815
+obs 13 34 4 13 16 1 4 12 21 9 11 37 2 4
+design.PSU 13 34 4 13 16 2 4 12 21 9 11 37 2 4
+actual.PSU 13 34 4 13 16 1 4 12 21 9 11 37 2 4
+Data variables:
+ [1] "cds" "stype" "name" "sname" "snum" "dname"
+ [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
+[13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
+[19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
+[25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
+[31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
+[37] "api.stu" "fpc" "pw"
+>
+> options(survey.lonely.psu="fail")
+> try(svymean(~api00,ds1))
+ mean SE
+api00 645.14 5.8909
+Warning message:
+In onestrat(x[index, , drop = FALSE], clusters[index], nPSU[index][1], :
+ Stratum (406) has only one PSU at stage 1
+> options(survey.lonely.psu="remove")
+> svymean(~api00,ds1)
+ mean SE
+api00 645.14 5.8909
+Warning message:
+In onestrat(x[index, , drop = FALSE], clusters[index], nPSU[index][1], :
+ Stratum (406) has only one PSU at stage 1
+> options(survey.lonely.psu="certainty")
+> svymean(~api00,ds1)
+ mean SE
+api00 645.14 5.8909
+Warning message:
+In onestrat(x[index, , drop = FALSE], clusters[index], nPSU[index][1], :
+ Stratum (406) has only one PSU at stage 1
+> options(survey.lonely.psu="adjust")
+> svymean(~api00,ds1)
+ mean SE
+api00 645.14 5.9119
+Warning message:
+In onestrat(x[index, , drop = FALSE], clusters[index], nPSU[index][1], :
+ Stratum (406) has only one PSU at stage 1
+> options(survey.lonely.psu="average")
+> svymean(~api00,ds1)
+ mean SE
+api00 645.14 6.0914
+Warning message:
+In onestrat(x[index, , drop = FALSE], clusters[index], nPSU[index][1], :
+ Stratum (406) has only one PSU at stage 1
+>
+> proc.time()
+ user system elapsed
+ 0.449 0.028 0.484
diff --git a/tests/multistage.R b/tests/multistage.R
new file mode 100644
index 0000000..7faf067
--- /dev/null
+++ b/tests/multistage.R
@@ -0,0 +1,6 @@
+##
+## Check that multistage samples still work
+##
+library(survey)
+example(mu284)
+
diff --git a/tests/multistage.Rout.save b/tests/multistage.Rout.save
new file mode 100644
index 0000000..35a1d08
--- /dev/null
+++ b/tests/multistage.Rout.save
@@ -0,0 +1,49 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> ##
+> ## Check that multistage samples still work
+> ##
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> example(mu284)
+
+mu284> data(mu284)
+
+mu284> (dmu284<-svydesign(id=~id1+id2,fpc=~n1+n2, data=mu284))
+2 - level Cluster Sampling design
+With (5, 15) clusters.
+svydesign(id = ~id1 + id2, fpc = ~n1 + n2, data = mu284)
+
+mu284> (ytotal<-svytotal(~y1, dmu284))
+ total SE
+y1 15080 2274.3
+
+mu284> vcov(ytotal)
+ y1
+y1 5172234
+>
+>
+> proc.time()
+ user system elapsed
+ 0.167 0.024 0.202
diff --git a/tests/nwtco-subcohort.rda b/tests/nwtco-subcohort.rda
new file mode 100644
index 0000000..3be00e0
Binary files /dev/null and b/tests/nwtco-subcohort.rda differ
diff --git a/tests/nwts-cch.R b/tests/nwts-cch.R
new file mode 100644
index 0000000..4ab26a1
--- /dev/null
+++ b/tests/nwts-cch.R
@@ -0,0 +1,31 @@
+library(survey)
+library(survival)
+data(nwtco)
+
+ntwco<-subset(nwtco, !is.na(edrel))
+
+load("nwtco-subcohort.rda")
+nwtco$subcohort<-subcohort
+
+d_BorganII <- twophase(id=list(~seqno,~seqno),
+ strata=list(NULL,~interaction(instit,rel)),
+ data=nwtco, subset=~I(rel |subcohort))
+
+##Coefficient results same as Splus with code from
+## http://faculty.washington.edu/norm/software.html
+## SE slightly larger due to using sandwich variance.
+
+svycoxph(Surv(edrel, rel)~factor(stage)+factor(histol)+I(age/12), design=d_BorganII)
+
+##
+## This gives higher standard errors. calibrate() does not recompute the
+## finite population correction if a calibration variable happens to predict
+## sampling perfectly. It probably should.
+##
+d_BorganIIps<-calibrate(twophase(id=list(~seqno,~seqno),
+ strata=list(NULL,~rel),
+ data=nwtco, subset=~I(rel |subcohort)),
+ phase=2, formula=~interaction(instit,rel),
+ epsilon=1e-10)
+
+svycoxph(Surv(edrel, rel)~factor(stage)+factor(histol)+I(age/12), design=d_BorganIIps)
diff --git a/tests/nwts-cch.Rout.save b/tests/nwts-cch.Rout.save
new file mode 100644
index 0000000..061ad37
--- /dev/null
+++ b/tests/nwts-cch.Rout.save
@@ -0,0 +1,88 @@
+
+R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
+Copyright (C) 2016 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.4.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+Loading required package: grid
+Loading required package: Matrix
+Loading required package: survival
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> library(survival)
+> data(nwtco)
+>
+> ntwco<-subset(nwtco, !is.na(edrel))
+>
+> load("nwtco-subcohort.rda")
+> nwtco$subcohort<-subcohort
+>
+> d_BorganII <- twophase(id=list(~seqno,~seqno),
++ strata=list(NULL,~interaction(instit,rel)),
++ data=nwtco, subset=~I(rel |subcohort))
+>
+> ##Coefficient results same as Splus with code from
+> ## http://faculty.washington.edu/norm/software.html
+> ## SE slightly larger due to using sandwich variance.
+>
+> svycoxph(Surv(edrel, rel)~factor(stage)+factor(histol)+I(age/12), design=d_BorganII)
+Call:
+svycoxph(formula = Surv(edrel, rel) ~ factor(stage) + factor(histol) +
+ I(age/12), design = d_BorganII)
+
+ coef exp(coef) se(coef) z p
+factor(stage)2 0.4629 1.5886 0.1809 2.56 0.0105
+factor(stage)3 0.5831 1.7916 0.1785 3.27 0.0011
+factor(stage)4 1.0597 2.8854 0.2052 5.16 2.4e-07
+factor(histol)2 1.5974 4.9403 0.1334 11.97 < 2e-16
+I(age/12) 0.0299 1.0304 0.0334 0.90 0.3697
+
+Likelihood ratio test= on 5 df, p=
+n= 1062, number of events= 571
+>
+> ##
+> ## This gives higher standard errors. calibrate() does not recompute the
+> ## finite population correction if a calibration variable happens to predict
+> ## sampling perfectly. It probably should.
+> ##
+> d_BorganIIps<-calibrate(twophase(id=list(~seqno,~seqno),
++ strata=list(NULL,~rel),
++ data=nwtco, subset=~I(rel |subcohort)),
++ phase=2, formula=~interaction(instit,rel),
++ epsilon=1e-10)
+>
+> svycoxph(Surv(edrel, rel)~factor(stage)+factor(histol)+I(age/12), design=d_BorganIIps)
+Call:
+svycoxph(formula = Surv(edrel, rel) ~ factor(stage) + factor(histol) +
+ I(age/12), design = d_BorganIIps)
+
+ coef exp(coef) se(coef) z p
+factor(stage)2 0.4629 1.5886 0.1808 2.56 0.0104
+factor(stage)3 0.5831 1.7916 0.1784 3.27 0.0011
+factor(stage)4 1.0597 2.8854 0.2051 5.17 2.4e-07
+factor(histol)2 1.5974 4.9403 0.1304 12.25 < 2e-16
+I(age/12) 0.0299 1.0304 0.0333 0.90 0.3688
+
+Likelihood ratio test= on 5 df, p=
+n= 1062, number of events= 571
+>
+> proc.time()
+ user system elapsed
+ 1.823 0.105 1.962
diff --git a/tests/nwts.R b/tests/nwts.R
new file mode 100644
index 0000000..901cee6
--- /dev/null
+++ b/tests/nwts.R
@@ -0,0 +1,39 @@
+
+## examples from Breslow & Chatterjee: Applied Statistics 1999 No. 4, p458
+## data from Norman Breslow's web page.
+library(survey)
+load("nwts.rda")
+nwtsnb<-nwts
+nwtsnb$case<-nwts$case-nwtsb$case
+nwtsnb$control<-nwts$control-nwtsb$control
+
+a<-rbind(nwtsb,nwtsnb)
+a$in.ccs<-rep(c(TRUE,FALSE),each=16)
+
+b<-rbind(a,a)
+b$rel<-rep(c(1,0),each=32)
+b$n<-ifelse(b$rel,b$case,b$control)
+
+index<-rep(1:64,b$n)
+
+nwt.exp<-b[index,c(1:3,6,7)]
+nwt.exp$id<-1:4088
+
+dccs2<-twophase(id=list(~id,~id),subset=~in.ccs,
+ strata=list(NULL,~interaction(instit,rel)),data=nwt.exp)
+
+dccs8<-twophase(id=list(~id,~id),subset=~in.ccs,
+ strata=list(NULL,~interaction(instit,stage,rel)),data=nwt.exp)
+
+gccs8<-calibrate(dccs2,phase=2,formula=~interaction(instit,stage,rel))
+
+summary(svyglm(rel~factor(stage)*factor(histol),family=quasibinomial,design=dccs2))
+summary(svyglm(rel~factor(stage)*factor(histol),family=quasibinomial,design=dccs8))
+summary(svyglm(rel~factor(stage)*factor(histol),family=quasibinomial,design=gccs8))
+
+## check subsets of calibrated designs.
+summary(svyglm(rel~factor(stage),
+ family=quasibinomial,design=subset(dccs8,histol==1)))
+summary(svyglm(rel~factor(stage),
+ family=quasibinomial,design=subset(gccs8,histol==1)))
+
diff --git a/tests/nwts.Rout.save b/tests/nwts.Rout.save
new file mode 100644
index 0000000..30ad4fa
--- /dev/null
+++ b/tests/nwts.Rout.save
@@ -0,0 +1,202 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+>
+> ## examples from Breslow & Chatterjee: Applied Statistics 1999 No. 4, p458
+> ## data from Norman Breslow's web page.
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> load("nwts.rda")
+> nwtsnb<-nwts
+> nwtsnb$case<-nwts$case-nwtsb$case
+> nwtsnb$control<-nwts$control-nwtsb$control
+>
+> a<-rbind(nwtsb,nwtsnb)
+> a$in.ccs<-rep(c(TRUE,FALSE),each=16)
+>
+> b<-rbind(a,a)
+> b$rel<-rep(c(1,0),each=32)
+> b$n<-ifelse(b$rel,b$case,b$control)
+>
+> index<-rep(1:64,b$n)
+>
+> nwt.exp<-b[index,c(1:3,6,7)]
+> nwt.exp$id<-1:4088
+>
+> dccs2<-twophase(id=list(~id,~id),subset=~in.ccs,
++ strata=list(NULL,~interaction(instit,rel)),data=nwt.exp)
+>
+> dccs8<-twophase(id=list(~id,~id),subset=~in.ccs,
++ strata=list(NULL,~interaction(instit,stage,rel)),data=nwt.exp)
+>
+> gccs8<-calibrate(dccs2,phase=2,formula=~interaction(instit,stage,rel))
+>
+> summary(svyglm(rel~factor(stage)*factor(histol),family=quasibinomial,design=dccs2))
+
+Call:
+svyglm(formula = rel ~ factor(stage) * factor(histol), family = quasibinomial,
+ design = dccs2)
+
+Survey design:
+twophase2(id = id, strata = strata, probs = probs, fpc = fpc,
+ subset = subset, data = data)
+
+Coefficients:
+ Estimate Std. Error t value Pr(>|t|)
+(Intercept) -2.5701 0.1288 -19.955 < 2e-16 ***
+factor(stage)2 0.5482 0.1979 2.769 0.005708 **
+factor(stage)3 0.4791 0.2032 2.359 0.018515 *
+factor(stage)4 1.0037 0.2592 3.872 0.000114 ***
+factor(histol)2 1.3505 0.3107 4.346 1.51e-05 ***
+factor(stage)2:factor(histol)2 0.1152 0.4410 0.261 0.793876
+factor(stage)3:factor(histol)2 0.5066 0.4241 1.194 0.232548
+factor(stage)4:factor(histol)2 0.9785 0.6214 1.575 0.115615
+---
+Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+(Dispersion parameter for quasibinomial family taken to be 1.000876)
+
+Number of Fisher Scoring iterations: 4
+
+> summary(svyglm(rel~factor(stage)*factor(histol),family=quasibinomial,design=dccs8))
+
+Call:
+svyglm(formula = rel ~ factor(stage) * factor(histol), family = quasibinomial,
+ design = dccs8)
+
+Survey design:
+twophase2(id = id, strata = strata, probs = probs, fpc = fpc,
+ subset = subset, data = data)
+
+Coefficients:
+ Estimate Std. Error t value Pr(>|t|)
+(Intercept) -2.71604 0.10827 -25.085 < 2e-16 ***
+factor(stage)2 0.78141 0.14726 5.306 1.35e-07 ***
+factor(stage)3 0.80093 0.15250 5.252 1.80e-07 ***
+factor(stage)4 1.07293 0.17817 6.022 2.33e-09 ***
+factor(histol)2 1.45836 0.31780 4.589 4.96e-06 ***
+factor(stage)2:factor(histol)2 -0.04743 0.43495 -0.109 0.913
+factor(stage)3:factor(histol)2 0.28064 0.41298 0.680 0.497
+factor(stage)4:factor(histol)2 0.90983 0.63774 1.427 0.154
+---
+Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+(Dispersion parameter for quasibinomial family taken to be 1.000876)
+
+Number of Fisher Scoring iterations: 4
+
+> summary(svyglm(rel~factor(stage)*factor(histol),family=quasibinomial,design=gccs8))
+
+Call:
+svyglm(formula = rel ~ factor(stage) * factor(histol), family = quasibinomial,
+ design = gccs8)
+
+Survey design:
+calibrate(dccs2, phase = 2, formula = ~interaction(instit, stage,
+ rel))
+
+Coefficients:
+ Estimate Std. Error t value Pr(>|t|)
+(Intercept) -2.71604 0.10878 -24.968 < 2e-16 ***
+factor(stage)2 0.78141 0.14729 5.305 1.35e-07 ***
+factor(stage)3 0.80093 0.15212 5.265 1.68e-07 ***
+factor(stage)4 1.07293 0.17905 5.993 2.77e-09 ***
+factor(histol)2 1.45836 0.31757 4.592 4.88e-06 ***
+factor(stage)2:factor(histol)2 -0.04743 0.43432 -0.109 0.913
+factor(stage)3:factor(histol)2 0.28064 0.41231 0.681 0.496
+factor(stage)4:factor(histol)2 0.90983 0.63187 1.440 0.150
+---
+Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+(Dispersion parameter for quasibinomial family taken to be 1.000876)
+
+Number of Fisher Scoring iterations: 4
+
+>
+> ## check subsets of calibrated designs.
+> summary(svyglm(rel~factor(stage),
++ family=quasibinomial,design=subset(dccs8,histol==1)))
+
+Call:
+svyglm(formula = rel ~ factor(stage), family = quasibinomial,
+ design = subset(dccs8, histol == 1))
+
+Survey design:
+subset(dccs8, histol == 1)
+
+Coefficients:
+ Estimate Std. Error t value Pr(>|t|)
+(Intercept) -2.7160 0.1083 -25.085 < 2e-16 ***
+factor(stage)2 0.7814 0.1473 5.306 1.48e-07 ***
+factor(stage)3 0.8009 0.1525 5.252 1.97e-07 ***
+factor(stage)4 1.0729 0.1782 6.022 2.73e-09 ***
+---
+Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+(Dispersion parameter for quasibinomial family taken to be 1.001333)
+
+Number of Fisher Scoring iterations: 4
+
+Warning messages:
+1: In `[.twophase2`(x, r, ) : 1 strata have only one PSU in this subset.
+2: In summary.glm(g) :
+ observations with zero weight not used for calculating dispersion
+3: In summary.glm(glm.object) :
+ observations with zero weight not used for calculating dispersion
+4: In `[.twophase2`(design, nas == 0, ) :
+ 1 strata have only one PSU in this subset.
+5: In `[.twophase2`(design, nas == 0, ) :
+ 1 strata have only one PSU in this subset.
+> summary(svyglm(rel~factor(stage),
++ family=quasibinomial,design=subset(gccs8,histol==1)))
+
+Call:
+svyglm(formula = rel ~ factor(stage), family = quasibinomial,
+ design = subset(gccs8, histol == 1))
+
+Survey design:
+subset(gccs8, histol == 1)
+
+Coefficients:
+ Estimate Std. Error t value Pr(>|t|)
+(Intercept) -2.7160 0.1082 -25.105 < 2e-16 ***
+factor(stage)2 0.7814 0.1457 5.363 1.10e-07 ***
+factor(stage)3 0.8009 0.1504 5.324 1.34e-07 ***
+factor(stage)4 1.0729 0.1759 6.101 1.70e-09 ***
+---
+Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+(Dispersion parameter for quasibinomial family taken to be 1.001333)
+
+Number of Fisher Scoring iterations: 4
+
+Warning messages:
+1: In summary.glm(g) :
+ observations with zero weight not used for calculating dispersion
+2: In summary.glm(glm.object) :
+ observations with zero weight not used for calculating dispersion
+>
+>
+> proc.time()
+ user system elapsed
+ 2.841 0.113 2.963
diff --git a/tests/nwts.rda b/tests/nwts.rda
new file mode 100644
index 0000000..6711361
Binary files /dev/null and b/tests/nwts.rda differ
diff --git a/tests/pps.R b/tests/pps.R
new file mode 100644
index 0000000..e9be41e
--- /dev/null
+++ b/tests/pps.R
@@ -0,0 +1,46 @@
+library(survey)
+data(election)
+
+dpps<- svydesign(id=~1, weights=~wt, fpc=~p, data=election_pps, pps="brewer")
+dppswr <-svydesign(id=~1, weights=~wt, data=election_pps)
+svytotal(~Bush+Kerry+Nader, dpps)
+svytotal(~Bush+Kerry+Nader, dppswr)
+
+##subsets
+svytotal(~Bush+Kerry+Nader, subset(dpps, Nader>0))
+
+##multistage: should agree with STRS analysis
+data(api)
+dclus2<-svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
+dclus2pps<-svydesign(id = ~dnum + snum, fpc = ~I(40/fpc1) + I(pmin(1,5/fpc2)), data = apiclus2)
+
+all.equal(svytotal(~sch.wide,dclus2), svytotal(~sch.wide,dclus2pps))
+all.equal(svymean(~sch.wide,dclus2), svymean(~sch.wide,dclus2pps))
+all.equal(svytotal(~enroll,dclus2), svytotal(~enroll,dclus2pps))
+
+## the new without-replacement methods
+data(election)
+dpps_br<- svydesign(id=~1, fpc=~p, data=election_pps, pps="brewer")
+dpps_ov<- svydesign(id=~1, fpc=~p, data=election_pps, pps="overton")
+dpps_hr<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40))
+dpps_hr1<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR())
+dpps_ht<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob))
+## Yates-Grundy type
+dpps_yg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob),variance="YG")
+dpps_hryg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40),variance="YG")
+
+## The with-replacement approximation
+svytotal(~Bush+Kerry+Nader, dpps_ht)
+svytotal(~Bush+Kerry+Nader, dpps_yg)
+svytotal(~Bush+Kerry+Nader, dpps_hr)
+svytotal(~Bush+Kerry+Nader, dpps_hryg)
+svytotal(~Bush+Kerry+Nader, dpps_hr1)
+svytotal(~Bush+Kerry+Nader, dpps_br)
+svytotal(~Bush+Kerry+Nader, dpps_ov)
+
+## subsets
+svytotal(~Bush+Kerry+Nader, subset(dpps_ht, Nader>0))
+svytotal(~Bush+Kerry+Nader, subset(dpps_hryg, Nader>0))
+
+## counts
+svyby(~Bush+Kerry+Nader,~I(Nader>0), unwtd.count,design=dpps_ht)
diff --git a/tests/pps.Rout.save b/tests/pps.Rout.save
new file mode 100644
index 0000000..872ff26
--- /dev/null
+++ b/tests/pps.Rout.save
@@ -0,0 +1,128 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> data(election)
+>
+> dpps<- svydesign(id=~1, weights=~wt, fpc=~p, data=election_pps, pps="brewer")
+> dppswr <-svydesign(id=~1, weights=~wt, data=election_pps)
+> svytotal(~Bush+Kerry+Nader, dpps)
+ total SE
+Bush 64518472 2447629
+Kerry 51202102 2450787
+Nader 478530 102420
+> svytotal(~Bush+Kerry+Nader, dppswr)
+ total SE
+Bush 64518472 2671455
+Kerry 51202102 2679433
+Nader 478530 105303
+>
+> ##subsets
+> svytotal(~Bush+Kerry+Nader, subset(dpps, Nader>0))
+ total SE
+Bush 34944285 5399833
+Kerry 25581714 4028434
+Nader 478530 102420
+>
+> ##multistage: should agree with STRS analysis
+> data(api)
+> dclus2<-svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
+> dclus2pps<-svydesign(id = ~dnum + snum, fpc = ~I(40/fpc1) + I(pmin(1,5/fpc2)), data = apiclus2)
+>
+> all.equal(svytotal(~sch.wide,dclus2), svytotal(~sch.wide,dclus2pps))
+[1] TRUE
+> all.equal(svymean(~sch.wide,dclus2), svymean(~sch.wide,dclus2pps))
+[1] TRUE
+> all.equal(svytotal(~enroll,dclus2), svytotal(~enroll,dclus2pps))
+[1] TRUE
+>
+> ## the new without-replacement methods
+> data(election)
+> dpps_br<- svydesign(id=~1, fpc=~p, data=election_pps, pps="brewer")
+> dpps_ov<- svydesign(id=~1, fpc=~p, data=election_pps, pps="overton")
+> dpps_hr<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40))
+> dpps_hr1<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR())
+> dpps_ht<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob))
+> ## Yates-Grundy type
+> dpps_yg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob),variance="YG")
+> dpps_hryg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40),variance="YG")
+>
+> ## The with-replacement approximation
+> svytotal(~Bush+Kerry+Nader, dpps_ht)
+ total SE
+Bush 64518472 2604404
+Kerry 51202102 2523712
+Nader 478530 102326
+> svytotal(~Bush+Kerry+Nader, dpps_yg)
+ total SE
+Bush 64518472 2406526
+Kerry 51202102 2408091
+Nader 478530 101664
+> svytotal(~Bush+Kerry+Nader, dpps_hr)
+ total SE
+Bush 64518472 2624662
+Kerry 51202102 2525222
+Nader 478530 102793
+> svytotal(~Bush+Kerry+Nader, dpps_hryg)
+ total SE
+Bush 64518472 2436738
+Kerry 51202102 2439845
+Nader 478530 102016
+> svytotal(~Bush+Kerry+Nader, dpps_hr1)
+ total SE
+Bush 64518472 2472753
+Kerry 51202102 2426842
+Nader 478530 102595
+> svytotal(~Bush+Kerry+Nader, dpps_br)
+ total SE
+Bush 64518472 2447629
+Kerry 51202102 2450787
+Nader 478530 102420
+> svytotal(~Bush+Kerry+Nader, dpps_ov)
+ total SE
+Bush 64518472 2939608
+Kerry 51202102 1964632
+Nader 478530 104373
+>
+> ## subsets
+> svytotal(~Bush+Kerry+Nader, subset(dpps_ht, Nader>0))
+ total SE
+Bush 34944285 5406348
+Kerry 25581714 4047741
+Nader 478530 102326
+> svytotal(~Bush+Kerry+Nader, subset(dpps_hryg, Nader>0))
+ total SE
+Bush 34944285 5377659
+Kerry 25581714 4010908
+Nader 478530 102016
+>
+> ## counts
+> svyby(~Bush+Kerry+Nader,~I(Nader>0), unwtd.count,design=dpps_ht)
+ I(Nader > 0) counts se
+FALSE FALSE 19 0
+TRUE TRUE 21 0
+>
+> proc.time()
+ user system elapsed
+ 1.946 0.058 2.017
diff --git a/tests/quantile.R b/tests/quantile.R
new file mode 100644
index 0000000..ba34f54
--- /dev/null
+++ b/tests/quantile.R
@@ -0,0 +1,19 @@
+library(survey)
+set.seed(42)
+
+df<-data.frame(x=exp(rnorm(1000)))
+df$y<-round(df$x,1)
+ddf<-svydesign(id=~1,data=df)
+rdf<-as.svrepdesign(ddf)
+
+SE(svyquantile(~x,ddf, c(0.01,0.1,0.5,0.9,0.99),ci=TRUE))
+
+SE(svyquantile(~x,rdf, c(0.01,0.1,0.5,0.9,0.99),ci=TRUE))
+
+
+svyquantile(~y,ddf, c(0.01,0.1,0.5,0.9,0.99),ci=TRUE,ties="rounded",interval.type="betaWald")
+
+svyquantile(~y,rdf, c(0.01,0.1,0.5,0.9,0.99),ci=TRUE)
+
+
+
diff --git a/tests/quantile.Rout.save b/tests/quantile.Rout.save
new file mode 100644
index 0000000..730f239
--- /dev/null
+++ b/tests/quantile.Rout.save
@@ -0,0 +1,84 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> set.seed(42)
+>
+> df<-data.frame(x=exp(rnorm(1000)))
+> df$y<-round(df$x,1)
+> ddf<-svydesign(id=~1,data=df)
+Warning message:
+In svydesign.default(id = ~1, data = df) :
+ No weights or probabilities supplied, assuming equal probability
+> rdf<-as.svrepdesign(ddf)
+>
+> SE(svyquantile(~x,ddf, c(0.01,0.1,0.5,0.9,0.99),ci=TRUE))
+ 0.01 0.1 0.5 0.9 0.99
+0.01545209 0.01265608 0.03388011 0.16145776 2.10061576
+>
+> SE(svyquantile(~x,rdf, c(0.01,0.1,0.5,0.9,0.99),ci=TRUE))
+ x
+q0.01 0.01534861
+q0.1 0.01514945
+q0.5 0.03394446
+q0.9 0.16409412
+q0.99 1.86410482
+>
+>
+> svyquantile(~y,ddf, c(0.01,0.1,0.5,0.9,0.99),ci=TRUE,ties="rounded",interval.type="betaWald")
+$quantiles
+ 0.01 0.1 0.5 0.9 0.99
+y 0.02352941 0.2230769 0.9340909 3.55 9.4
+
+$CIs
+, , y
+
+ 0.01 0.1 0.5 0.9 0.99
+(lower 0.01594200 0.2020115 0.8578143 3.207543 7.568142
+upper) 0.03287947 0.2467045 1.0059815 3.862241 14.978632
+
+
+>
+> svyquantile(~y,rdf, c(0.01,0.1,0.5,0.9,0.99),ci=TRUE)
+Statistic:
+ y
+q0.01 0.1
+q0.1 0.3
+q0.5 1.0
+q0.9 3.6
+q0.99 9.4
+SE:
+ y
+q0.01 0.0250000
+q0.1 0.0250000
+q0.5 0.0250000
+q0.9 0.1516809
+q0.99 1.8599967
+>
+>
+>
+>
+> proc.time()
+ user system elapsed
+ 0.917 0.047 0.968
diff --git a/tests/rakecheck.R b/tests/rakecheck.R
new file mode 100644
index 0000000..1285a2e
--- /dev/null
+++ b/tests/rakecheck.R
@@ -0,0 +1,55 @@
+library(survey)
+
+data(api)
+dclus1 <- svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+rclus1 <- as.svrepdesign(dclus1)
+
+## population marginal totals for each stratum
+pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
+pop.schwide <- data.frame(sch.wide=c("No","Yes"), Freq=c(1072,5122))
+
+rclus1r <- rake(rclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
+
+svymean(~api00, rclus1r)
+svytotal(~enroll, rclus1r)
+
+ff<-~stype+sch.wide
+poptotals<-colSums(model.matrix(ff,model.frame(ff,apipop)))
+rclus1g<-calibrate(rclus1, ~stype+sch.wide, poptotals,calfun="raking")
+
+svymean(~api00,rclus1g)
+svytotal(~enroll,rclus1g)
+
+summary(weights(rclus1g)/weights(rclus1r))
+
+
+## Do it for a design without replicate weights
+dclus1r<-rake(dclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
+
+svymean(~api00, dclus1r)
+svytotal(~enroll, dclus1r)
+
+dclus1g<-calibrate(dclus1, ~stype+sch.wide, poptotals,calfun="raking")
+
+svymean(~api00,dclus1g)
+svytotal(~enroll,dclus1g)
+
+summary(weights(dclus1g)/weights(dclus1r))
+
+
+
+## Example of raking with partial joint distributions
+pop.table <- xtabs(~stype+sch.wide,apipop)
+pop.imp<-data.frame(comp.imp=c("No","Yes"),Freq=c(1712,4482))
+dclus1r2<-rake(dclus1, list(~stype+sch.wide, ~comp.imp),
+ list(pop.table, pop.imp))
+svymean(~api00, dclus1r2)
+
+ff1 <-~stype*sch.wide+comp.imp
+
+poptotals1<-colSums(model.matrix(ff1,model.frame(ff1,apipop)))
+dclus1g2<-calibrate(dclus1, ~stype*sch.wide+comp.imp, poptotals1, calfun="raking")
+
+svymean(~api00, dclus1g2)
+
+summary(weights(dclus1r2)/weights(dclus1g2))
diff --git a/tests/rakecheck.Rout.save b/tests/rakecheck.Rout.save
new file mode 100644
index 0000000..bfc6144
--- /dev/null
+++ b/tests/rakecheck.Rout.save
@@ -0,0 +1,132 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+>
+> data(api)
+> dclus1 <- svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+> rclus1 <- as.svrepdesign(dclus1)
+>
+> ## population marginal totals for each stratum
+> pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
+> pop.schwide <- data.frame(sch.wide=c("No","Yes"), Freq=c(1072,5122))
+>
+> rclus1r <- rake(rclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
+>
+> svymean(~api00, rclus1r)
+ mean SE
+api00 641.23 26.873
+> svytotal(~enroll, rclus1r)
+ total SE
+enroll 3647300 463511
+>
+> ff<-~stype+sch.wide
+> poptotals<-colSums(model.matrix(ff,model.frame(ff,apipop)))
+> rclus1g<-calibrate(rclus1, ~stype+sch.wide, poptotals,calfun="raking")
+Loading required package: MASS
+>
+> svymean(~api00,rclus1g)
+ mean SE
+api00 641.23 26.874
+> svytotal(~enroll,rclus1g)
+ total SE
+enroll 3647280 463582
+>
+> summary(weights(rclus1g)/weights(rclus1r))
+ V1 V2 V3 V4 V5 V6
+ Min. :1 Min. :1 Min. :1 Min. :1 Min. :1 Min. :1
+ 1st Qu.:1 1st Qu.:1 1st Qu.:1 1st Qu.:1 1st Qu.:1 1st Qu.:1
+ Median :1 Median :1 Median :1 Median :1 Median :1 Median :1
+ Mean :1 Mean :1 Mean :1 Mean :1 Mean :1 Mean :1
+ 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1
+ Max. :1 Max. :1 Max. :1 Max. :1 Max. :1 Max. :1
+ NA's :11 NA's :4 NA's :2 NA's :13 NA's :2 NA's :4
+ V7 V8 V9 V10 V11
+ Min. :1 Min. :1 Min. :1 Min. :1 Min. :1
+ 1st Qu.:1 1st Qu.:1 1st Qu.:1 1st Qu.:1 1st Qu.:1
+ Median :1 Median :1 Median :1 Median :1 Median :1
+ Mean :1 Mean :1 Mean :1 Mean :1 Mean :1
+ 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1
+ Max. :1 Max. :1 Max. :1 Max. :1 Max. :1
+ NA's :4 NA's :16 NA's :9 NA's :34 NA's :21
+ V12 V13 V14 V15
+ Min. :0.9997 Min. :1 Min. :1 Min. :1
+ 1st Qu.:1.0001 1st Qu.:1 1st Qu.:1 1st Qu.:1
+ Median :1.0001 Median :1 Median :1 Median :1
+ Mean :1.0000 Mean :1 Mean :1 Mean :1
+ 3rd Qu.:1.0001 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1
+ Max. :1.0002 Max. :1 Max. :1 Max. :1
+ NA's :37 NA's :13 NA's :1 NA's :12
+>
+>
+> ## Do it for a design without replicate weights
+> dclus1r<-rake(dclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
+>
+> svymean(~api00, dclus1r)
+ mean SE
+api00 641.23 23.704
+> svytotal(~enroll, dclus1r)
+ total SE
+enroll 3647300 400603
+>
+> dclus1g<-calibrate(dclus1, ~stype+sch.wide, poptotals,calfun="raking")
+>
+> svymean(~api00,dclus1g)
+ mean SE
+api00 641.23 23.704
+> svytotal(~enroll,dclus1g)
+ total SE
+enroll 3647280 400603
+>
+> summary(weights(dclus1g)/weights(dclus1r))
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+ 1 1 1 1 1 1
+>
+>
+>
+> ## Example of raking with partial joint distributions
+> pop.table <- xtabs(~stype+sch.wide,apipop)
+> pop.imp<-data.frame(comp.imp=c("No","Yes"),Freq=c(1712,4482))
+> dclus1r2<-rake(dclus1, list(~stype+sch.wide, ~comp.imp),
++ list(pop.table, pop.imp))
+> svymean(~api00, dclus1r2)
+ mean SE
+api00 642.62 22.732
+>
+> ff1 <-~stype*sch.wide+comp.imp
+>
+> poptotals1<-colSums(model.matrix(ff1,model.frame(ff1,apipop)))
+> dclus1g2<-calibrate(dclus1, ~stype*sch.wide+comp.imp, poptotals1, calfun="raking")
+>
+> svymean(~api00, dclus1g2)
+ mean SE
+api00 642.61 22.731
+>
+> summary(weights(dclus1r2)/weights(dclus1g2))
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+ 0.999 1.000 1.000 1.000 1.000 1.002
+>
+> proc.time()
+ user system elapsed
+ 0.459 0.032 0.499
diff --git a/tests/raowuboot.R b/tests/raowuboot.R
new file mode 100644
index 0000000..d2a15f6
--- /dev/null
+++ b/tests/raowuboot.R
@@ -0,0 +1,4 @@
+## regression test for bug reported by Richard Valliant
+library(survey)
+s<-subbootweights(c(1,1),1:2, 50)
+stopifnot(all(s$repweights$weights %in% c(0,2)))
diff --git a/tests/raowuboot.Rout.save b/tests/raowuboot.Rout.save
new file mode 100644
index 0000000..6be9d32
--- /dev/null
+++ b/tests/raowuboot.Rout.save
@@ -0,0 +1,32 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> ## regression test for bug reported by Richard Valliant
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> s<-subbootweights(c(1,1),1:2, 50)
+> stopifnot(all(s$repweights$weights %in% c(0,2)))
+>
+> proc.time()
+ user system elapsed
+ 0.140 0.021 0.165
diff --git a/tests/regpredict.R b/tests/regpredict.R
new file mode 100644
index 0000000..4a172c1
--- /dev/null
+++ b/tests/regpredict.R
@@ -0,0 +1,39 @@
+library(survey)
+data(api)
+dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+
+
+## regression estimator of total, three ways
+pop<-data.frame(enroll=sum(apipop$enroll, na.rm=TRUE))
+npop <- sum(!is.na(apipop$enroll))
+
+api.reg <- svyglm(api.stu~enroll, design=dstrat)
+a <- predict(api.reg, newdata=pop, total=npop)
+b <- svytotal(~api.stu, calibrate(dstrat, ~enroll, pop=c(npop, pop$enroll)))
+
+all.equal(as.vector(coef(a)),as.vector(coef(b)))
+all.equal(as.vector(SE(a)), as.vector(SE(b)))
+if(!is.null(getOption("DEBUG"))){ ## uses 6194x6194 matrix
+ d <- predict(api.reg, newdata=na.omit(apipop[,"enroll",drop=FALSE]))
+ all.equal(as.vector(coef(a)), as.vector(sum(coef(d))))
+ all.equal(as.vector(SE(a)), as.vector(sqrt(sum(vcov(d)))))
+}
+
+## classical ratio estimator, four ways.
+api.reg2 <- svyglm(api.stu~enroll-1, design=dstrat,
+ family=quasi(link="identity", var="mu"))
+
+a <- predict(api.reg2, newdata=pop, total=npop)
+b <- svytotal(~api.stu,
+ calibrate(dstrat, ~enroll-1, pop= pop$enroll, variance=2))
+e <- predict(svyratio(~api.stu, ~enroll, dstrat),total=pop$enroll)
+
+all.equal(as.vector(coef(a)),as.vector(coef(b)))
+all.equal(as.vector(SE(a)), as.vector(SE(b)))
+all.equal(as.vector(coef(a)),as.vector(e$total))
+all.equal(as.vector(SE(a)), as.vector(e$se))
+if(!is.null(getOption("DEBUG"))){## uses 6194x6194 matrix
+ d <- predict(api.reg2, newdata=na.omit(apipop[,"enroll",drop=FALSE]))
+ all.equal(as.vector(coef(a)), as.vector(sum(coef(d))))
+ all.equal(as.vector(SE(a)), as.vector(sqrt(sum(vcov(d)))))
+}
diff --git a/tests/regpredict.Rout.save b/tests/regpredict.Rout.save
new file mode 100644
index 0000000..88fa1ce
--- /dev/null
+++ b/tests/regpredict.Rout.save
@@ -0,0 +1,73 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> data(api)
+> dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+>
+>
+> ## regression estimator of total, three ways
+> pop<-data.frame(enroll=sum(apipop$enroll, na.rm=TRUE))
+> npop <- sum(!is.na(apipop$enroll))
+>
+> api.reg <- svyglm(api.stu~enroll, design=dstrat)
+> a <- predict(api.reg, newdata=pop, total=npop)
+> b <- svytotal(~api.stu, calibrate(dstrat, ~enroll, pop=c(npop, pop$enroll)))
+>
+> all.equal(as.vector(coef(a)),as.vector(coef(b)))
+[1] TRUE
+> all.equal(as.vector(SE(a)), as.vector(SE(b)))
+[1] TRUE
+> if(!is.null(getOption("DEBUG"))){ ## uses 6194x6194 matrix
++ d <- predict(api.reg, newdata=na.omit(apipop[,"enroll",drop=FALSE]))
++ all.equal(as.vector(coef(a)), as.vector(sum(coef(d))))
++ all.equal(as.vector(SE(a)), as.vector(sqrt(sum(vcov(d)))))
++ }
+>
+> ## classical ratio estimator, four ways.
+> api.reg2 <- svyglm(api.stu~enroll-1, design=dstrat,
++ family=quasi(link="identity", var="mu"))
+>
+> a <- predict(api.reg2, newdata=pop, total=npop)
+> b <- svytotal(~api.stu,
++ calibrate(dstrat, ~enroll-1, pop= pop$enroll, variance=2))
+> e <- predict(svyratio(~api.stu, ~enroll, dstrat),total=pop$enroll)
+>
+> all.equal(as.vector(coef(a)),as.vector(coef(b)))
+[1] TRUE
+> all.equal(as.vector(SE(a)), as.vector(SE(b)))
+[1] TRUE
+> all.equal(as.vector(coef(a)),as.vector(e$total))
+[1] TRUE
+> all.equal(as.vector(SE(a)), as.vector(e$se))
+[1] TRUE
+> if(!is.null(getOption("DEBUG"))){## uses 6194x6194 matrix
++ d <- predict(api.reg2, newdata=na.omit(apipop[,"enroll",drop=FALSE]))
++ all.equal(as.vector(coef(a)), as.vector(sum(coef(d))))
++ all.equal(as.vector(SE(a)), as.vector(sqrt(sum(vcov(d)))))
++ }
+>
+> proc.time()
+ user system elapsed
+ 0.239 0.026 0.272
diff --git a/tests/scoping.R b/tests/scoping.R
new file mode 100755
index 0000000..b8b7d65
--- /dev/null
+++ b/tests/scoping.R
@@ -0,0 +1,34 @@
+
+## regression test for testing regression
+
+library(survey)
+data(api)
+
+dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+
+
+f<-function(){
+ form<-acs.46~stype
+ svyglm(formula=form, design = dstrat)
+}
+
+g<-function(form){
+ svyglm(formula=form, design = dstrat)
+}
+f()
+g(acs.46~stype)
+
+f<-function(){
+ form<-Surv(acs.46)~stype
+ svycoxph(formula=form, design = dstrat)
+}
+
+g<-function(form){
+ svycoxph(formula=form, design = dstrat)
+}
+
+f()
+g(Surv(acs.46)~stype)
+
+## check coxph for a single predictor
+svycoxph(Surv(acs.46)~api00,design=dstrat)
diff --git a/tests/scoping.Rout.save b/tests/scoping.Rout.save
new file mode 100644
index 0000000..487c0e6
--- /dev/null
+++ b/tests/scoping.Rout.save
@@ -0,0 +1,122 @@
+
+R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
+Copyright (C) 2016 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.4.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+>
+> ## regression test for testing regression
+>
+> library(survey)
+Loading required package: grid
+Loading required package: Matrix
+Loading required package: survival
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> data(api)
+>
+> dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
+>
+>
+> f<-function(){
++ form<-acs.46~stype
++ svyglm(formula=form, design = dstrat)
++ }
+>
+> g<-function(form){
++ svyglm(formula=form, design = dstrat)
++ }
+> f()
+Stratified Independent Sampling design
+svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat,
+ fpc = ~fpc)
+
+Call: svyglm(formula = form, design = dstrat)
+
+Coefficients:
+(Intercept) stypeH stypeM
+ 28.7449 0.7551 0.4022
+
+Degrees of Freedom: 133 Total (i.e. Null); 129 Residual
+ (66 observations deleted due to missingness)
+Null Deviance: 1838
+Residual Deviance: 1835 AIC: 719.8
+> g(acs.46~stype)
+Stratified Independent Sampling design
+svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat,
+ fpc = ~fpc)
+
+Call: svyglm(formula = form, design = dstrat)
+
+Coefficients:
+(Intercept) stypeH stypeM
+ 28.7449 0.7551 0.4022
+
+Degrees of Freedom: 133 Total (i.e. Null); 129 Residual
+ (66 observations deleted due to missingness)
+Null Deviance: 1838
+Residual Deviance: 1835 AIC: 719.8
+>
+> f<-function(){
++ form<-Surv(acs.46)~stype
++ svycoxph(formula=form, design = dstrat)
++ }
+>
+> g<-function(form){
++ svycoxph(formula=form, design = dstrat)
++ }
+>
+> f()
+Call:
+svycoxph(formula = form, design = dstrat)
+
+ coef exp(coef) se(coef) z p
+stypeH -0.677 0.508 0.665 -1.02 0.31
+stypeM -0.251 0.778 0.209 -1.20 0.23
+
+Likelihood ratio test= on 2 df, p=
+n= 134, number of events= 134
+ (66 observations deleted due to missingness)
+> g(Surv(acs.46)~stype)
+Call:
+svycoxph(formula = form, design = dstrat)
+
+ coef exp(coef) se(coef) z p
+stypeH -0.677 0.508 0.665 -1.02 0.31
+stypeM -0.251 0.778 0.209 -1.20 0.23
+
+Likelihood ratio test= on 2 df, p=
+n= 134, number of events= 134
+ (66 observations deleted due to missingness)
+>
+> ## check coxph for a single predictor
+> svycoxph(Surv(acs.46)~api00,design=dstrat)
+Call:
+svycoxph(formula = Surv(acs.46) ~ api00, design = dstrat)
+
+ coef exp(coef) se(coef) z p
+api00 6.06e-06 1.00e+00 7.49e-04 0.01 0.99
+
+Likelihood ratio test= on 1 df, p=
+n= 134, number of events= 134
+ (66 observations deleted due to missingness)
+>
+> proc.time()
+ user system elapsed
+ 1.030 0.059 1.106
diff --git a/tests/survcurve.R b/tests/survcurve.R
new file mode 100644
index 0000000..3a95fe8
--- /dev/null
+++ b/tests/survcurve.R
@@ -0,0 +1,30 @@
+library(survey)
+library(survival)
+
+pbc2<-rbind(pbc,pbc)
+pbc2$id<-rep(1:418,2)
+
+dpbc1<-svydesign(id=~1, data=pbc)
+dpbc2<-svydesign(id=~id, data=pbc2)
+
+s1<-svykm(Surv(time,status>0)~1, subset(dpbc1, bili>6), se=TRUE)
+s2<-svykm(Surv(time,status>0)~1, subset(dpbc2, bili>6), se=TRUE)
+
+(c1<-confint(s1,(1:5)*365))
+(c2<-confint(s2,(1:5)*365))
+all.equal(c1, c2)
+
+m1<-svycoxph(Surv(time,status>0)~log(bili), design=dpbc1)
+m2<-svycoxph(Surv(time,status>0)~log(bili), design=dpbc2)
+
+d<-data.frame(bili=c(5,10))
+p1<-predict(m1, se=TRUE, newdata=d,type="curve")
+p2<-predict(m2, se=TRUE, newdata=d,type="curve")
+
+(pc1<-confint(p1[[1]],(1:5)*365))
+(pc2<-confint(p2[[1]],(1:5)*365))
+all.equal(pc1, pc2)
+
+(q1<-quantile(p1[[2]]))
+(q2<-quantile(p2[[2]]))
+all.equal(q1,q2)
diff --git a/tests/survcurve.Rout.save b/tests/survcurve.Rout.save
new file mode 100644
index 0000000..bdb68ac
--- /dev/null
+++ b/tests/survcurve.Rout.save
@@ -0,0 +1,96 @@
+
+R version 3.1.0 (2014-04-10) -- "Spring Dance"
+Copyright (C) 2014 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.1.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+> library(survival)
+Loading required package: splines
+>
+> pbc2<-rbind(pbc,pbc)
+> pbc2$id<-rep(1:418,2)
+>
+> dpbc1<-svydesign(id=~1, data=pbc)
+Warning message:
+In svydesign.default(id = ~1, data = pbc) :
+ No weights or probabilities supplied, assuming equal probability
+> dpbc2<-svydesign(id=~id, data=pbc2)
+Warning message:
+In svydesign.default(id = ~id, data = pbc2) :
+ No weights or probabilities supplied, assuming equal probability
+>
+> s1<-svykm(Surv(time,status>0)~1, subset(dpbc1, bili>6), se=TRUE)
+> s2<-svykm(Surv(time,status>0)~1, subset(dpbc2, bili>6), se=TRUE)
+>
+> (c1<-confint(s1,(1:5)*365))
+ 0.025 0.975
+365 0.6446215 0.8594153
+730 0.5410938 0.7766848
+1095 0.2683127 0.5103356
+1460 0.1444731 0.3722001
+1825 0.1009672 0.3204713
+> (c2<-confint(s2,(1:5)*365))
+ 0.025 0.975
+365 0.6446215 0.8594153
+730 0.5410938 0.7766848
+1095 0.2683127 0.5103356
+1460 0.1444731 0.3722001
+1825 0.1009672 0.3204713
+> all.equal(c1, c2)
+[1] TRUE
+>
+> m1<-svycoxph(Surv(time,status>0)~log(bili), design=dpbc1)
+> m2<-svycoxph(Surv(time,status>0)~log(bili), design=dpbc2)
+>
+> d<-data.frame(bili=c(5,10))
+> p1<-predict(m1, se=TRUE, newdata=d,type="curve")
+> p2<-predict(m2, se=TRUE, newdata=d,type="curve")
+>
+> (pc1<-confint(p1[[1]],(1:5)*365))
+ 0.025 0.975
+365 0.8410027 0.9266263
+730 0.7371114 0.8548312
+1095 0.5517779 0.7018583
+1460 0.4335073 0.5992819
+1825 0.3260899 0.5046241
+> (pc2<-confint(p2[[1]],(1:5)*365))
+ 0.025 0.975
+365 0.8409490 0.9267054
+730 0.7370152 0.8549432
+1095 0.5515848 0.7019513
+1460 0.4332252 0.5992968
+1825 0.3257172 0.5045795
+> all.equal(pc1, pc2)
+[1] "Mean relative difference: 0.0002070722"
+>
+> (q1<-quantile(p1[[2]]))
+0.75 0.5 0.25
+ 489 930 1492
+> (q2<-quantile(p2[[2]]))
+0.75 0.5 0.25
+ 489 930 1492
+> all.equal(q1,q2)
+[1] TRUE
+>
+> proc.time()
+ user system elapsed
+ 3.410 0.099 3.519
diff --git a/tests/twophase.R b/tests/twophase.R
new file mode 100644
index 0000000..94c0dcb
--- /dev/null
+++ b/tests/twophase.R
@@ -0,0 +1,133 @@
+library(survey)
+
+## two-phase simple random sampling.
+data(pbc, package="survival")
+pbc$id<-1:nrow(pbc)
+pbc$randomized<-with(pbc, !is.na(trt) & trt>-9)
+(d2pbc<-twophase(id=list(~id,~id), data=pbc, subset=~I(!randomized)))
+m<-svymean(~bili, d2pbc)
+all.equal(as.vector(coef(m)),with(pbc, mean(bili[!randomized])))
+all.equal(as.vector(SE(m)),
+ with(pbc, sd(bili[!randomized])/sqrt(sum(!randomized))),
+ tolerance=0.001)
+
+## two-stage sampling as two-phase
+data(mu284)
+ii<-with(mu284, c(1:15, rep(1:5,n2[1:5]-3)))
+mu284.1<-mu284[ii,]
+mu284.1$id<-1:nrow(mu284.1)
+mu284.1$sub<-rep(c(TRUE,FALSE),c(15,34-15))
+dmu284<-svydesign(id=~id1+id2,fpc=~n1+n2, data=mu284)
+## first phase cluster sample, second phase stratified within cluster
+(d2mu284<-twophase(id=list(~id1,~id),strata=list(NULL,~id1),
+ fpc=list(~n1,NULL),data=mu284.1,subset=~sub,method="approx"))
+(d22mu284<-twophase(id=list(~id1,~id),strata=list(NULL,~id1),
+ fpc=list(~n1,NULL),data=mu284.1,subset=~sub,method="full"))
+summary(d2mu284)
+t1<-svytotal(~y1, dmu284)
+t2<-svytotal(~y1, d2mu284)
+t22<-svytotal(~y1,d22mu284)
+m1<-svymean(~y1, dmu284)
+m2<-svymean(~y1, d2mu284)
+m22<-svymean(~y1, d22mu284)
+all.equal(coef(t1),coef(t2))
+all.equal(coef(t1),coef(t22))
+all.equal(coef(m1),coef(m2))
+all.equal(coef(m1),coef(m22))
+all.equal(as.vector(SE(m1)),as.vector(SE(m2)))
+all.equal(as.vector(SE(m1)),as.vector(SE(m22)))
+all.equal(as.vector(SE(t1)),as.vector(SE(t2)))
+all.equal(as.vector(SE(t1)),as.vector(SE(t22)))
+
+## case-cohort design
+##this example requires R 2.3.1 or later for cch and data.
+library("survival")
+data(nwtco, package="survival")
+## unstratified, equivalent to Lin & Ying (1993)
+print(dcchs<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
+ subset=~I(in.subcohort | rel), data=nwtco))
+cch1<-svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=dcchs)
+dcchs2<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
+ subset=~I(in.subcohort | rel), data=nwtco,method="approx")
+cch1.2<-svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=dcchs)
+all.equal(coef(cch1),coef(cch1.2))
+all.equal(SE(cch1),SE(cch1.2))
+## Using survival::cch
+subcoh <- nwtco$in.subcohort
+selccoh <- with(nwtco, rel==1|subcoh==1)
+ccoh.data <- nwtco[selccoh,]
+ccoh.data$subcohort <- subcoh[selccoh]
+cch2<-cch(Surv(edrel, rel) ~ factor(stage) + factor(histol) + I(age/12),
+ data =ccoh.data, subcoh = ~subcohort, id=~seqno,
+ cohort.size=4028, method="LinYing", robust=TRUE)
+
+print(all.equal(as.vector(coef(cch1)),as.vector(coef(cch2))))
+## cch has smaller variances by a factor of 1.0005 because
+## there is a (n/(n-1)) in the survey phase1 varianace
+print(all.equal(as.vector(SE(cch1)), as.vector(SE(cch2)),tolerance=0.0006))
+
+
+## bug report from Takahiro Tsuchiya for version 3.4
+## We used to not match Sarndal exactly, because our old phase-one
+## estimator had a small bias for finite populations
+rei<-read.table(tmp<-textConnection(
+" id N n.a h n.ah n.h sub y
+1 1 300 20 1 12 5 TRUE 1
+2 2 300 20 1 12 5 TRUE 2
+3 3 300 20 1 12 5 TRUE 3
+4 4 300 20 1 12 5 TRUE 4
+5 5 300 20 1 12 5 TRUE 5
+6 6 300 20 1 12 5 FALSE NA
+7 7 300 20 1 12 5 FALSE NA
+8 8 300 20 1 12 5 FALSE NA
+9 9 300 20 1 12 5 FALSE NA
+10 10 300 20 1 12 5 FALSE NA
+11 11 300 20 1 12 5 FALSE NA
+12 12 300 20 1 12 5 FALSE NA
+13 13 300 20 2 8 3 TRUE 6
+14 14 300 20 2 8 3 TRUE 7
+15 15 300 20 2 8 3 TRUE 8
+16 16 300 20 2 8 3 FALSE NA
+17 17 300 20 2 8 3 FALSE NA
+18 18 300 20 2 8 3 FALSE NA
+19 19 300 20 2 8 3 FALSE NA
+20 20 300 20 2 8 3 FALSE NA
+"), header=TRUE)
+close(tmp)
+
+des.rei <- twophase(id=list(~id,~id), strata=list(NULL,~h),
+ fpc=list(~N,NULL), subset=~sub, data=rei, method="approx")
+tot<- svytotal(~y, des.rei)
+des.rei2 <- twophase(id=list(~id,~id), strata=list(NULL,~h),
+ fpc=list(~N,NULL), subset=~sub, data=rei)
+tot2<- svytotal(~y, des.rei2)
+
+## based on Sarndal et al (9.4.14)
+rei$w.ah <- rei$n.ah / rei$n.a
+a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE)
+a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE)
+a.rei$y.u <- sum(a.rei$w.ah * a.rei$y)
+V <- with(a.rei, sum(N * (N-1) * ((n.ah-1)/(n.a-1) - (n.h-1)/(N-1)) * w.ah * S.ysh / n.h))
+V <- V + with(a.rei, sum(N * (N-n.a) * w.ah * (y - y.u)^2 / (n.a-1)))
+
+a.rei$f.h<-with(a.rei, n.h/n.ah)
+Vphase2<-with(a.rei, sum(N*N*w.ah^2* ((1-f.h)/n.h) *S.ysh))
+
+a.rei$f<-with(a.rei, n.a/N)
+a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1))
+Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2)))
+
+V
+Vphase1
+Vphase2
+vcov(tot)
+vcov(tot2)
+## phase 2 identical
+all.equal(Vphase2,drop(attr(vcov(tot),"phases")$phase2))
+all.equal(Vphase2,drop(attr(vcov(tot2),"phases")$phase2))
+## phase 1 differs by 2.6% for old twophase estimator
+Vphase1/attr(vcov(tot),"phases")$phase1
+all.equal(Vphase1,as.vector(attr(vcov(tot2),"phases")$phase1))
+
diff --git a/tests/twophase.Rout.save b/tests/twophase.Rout.save
new file mode 100644
index 0000000..7ac7d2e
--- /dev/null
+++ b/tests/twophase.Rout.save
@@ -0,0 +1,271 @@
+
+R version 3.2.1 (2015-06-18) -- "World-Famous Astronaut"
+Copyright (C) 2015 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.4.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(survey)
+Loading required package: grid
+Loading required package: Matrix
+Loading required package: survival
+
+Attaching package: 'survey'
+
+The following object is masked from 'package:graphics':
+
+ dotchart
+
+>
+> ## two-phase simple random sampling.
+> data(pbc, package="survival")
+> pbc$id<-1:nrow(pbc)
+> pbc$randomized<-with(pbc, !is.na(trt) & trt>-9)
+> (d2pbc<-twophase(id=list(~id,~id), data=pbc, subset=~I(!randomized)))
+Two-phase sparse-matrix design:
+ twophase2(id = id, strata = strata, probs = probs, fpc = fpc,
+ subset = subset, data = data)
+Phase 1:
+Independent Sampling design (with replacement)
+svydesign(ids = ~id)
+Phase 2:
+Independent Sampling design
+svydesign(ids = ~id, fpc = `*phase1*`)
+> m<-svymean(~bili, d2pbc)
+> all.equal(as.vector(coef(m)),with(pbc, mean(bili[!randomized])))
+[1] TRUE
+> all.equal(as.vector(SE(m)),
++ with(pbc, sd(bili[!randomized])/sqrt(sum(!randomized))),
++ tolerance=0.001)
+[1] "Mean relative difference: 0.001198323"
+>
+> ## two-stage sampling as two-phase
+> data(mu284)
+> ii<-with(mu284, c(1:15, rep(1:5,n2[1:5]-3)))
+> mu284.1<-mu284[ii,]
+> mu284.1$id<-1:nrow(mu284.1)
+> mu284.1$sub<-rep(c(TRUE,FALSE),c(15,34-15))
+> dmu284<-svydesign(id=~id1+id2,fpc=~n1+n2, data=mu284)
+> ## first phase cluster sample, second phase stratified within cluster
+> (d2mu284<-twophase(id=list(~id1,~id),strata=list(NULL,~id1),
++ fpc=list(~n1,NULL),data=mu284.1,subset=~sub,method="approx"))
+Two-phase design: twophase(id = list(~id1, ~id), strata = list(NULL, ~id1), fpc = list(~n1,
+ NULL), data = mu284.1, subset = ~sub, method = "approx")
+Phase 1:
+1 - level Cluster Sampling design
+With (5) clusters.
+svydesign(ids = ~id1, fpc = ~n1)
+Phase 2:
+Stratified Independent Sampling design
+svydesign(ids = ~id, strata = ~id1, fpc = `*phase1*`)
+> (d22mu284<-twophase(id=list(~id1,~id),strata=list(NULL,~id1),
++ fpc=list(~n1,NULL),data=mu284.1,subset=~sub,method="full"))
+Two-phase sparse-matrix design:
+ twophase2(id = id, strata = strata, probs = probs, fpc = fpc,
+ subset = subset, data = data)
+Phase 1:
+1 - level Cluster Sampling design
+With (5) clusters.
+svydesign(ids = ~id1, fpc = ~n1)
+Phase 2:
+Stratified Independent Sampling design
+svydesign(ids = ~id, strata = ~id1, fpc = `*phase1*`)
+> summary(d2mu284)
+Two-phase design: twophase(id = list(~id1, ~id), strata = list(NULL, ~id1), fpc = list(~n1,
+ NULL), data = mu284.1, subset = ~sub, method = "approx")
+Phase 1:
+1 - level Cluster Sampling design
+With (5) clusters.
+svydesign(ids = ~id1, fpc = ~n1)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+ 0.1 0.1 0.1 0.1 0.1 0.1
+Population size (PSUs): 50
+Phase 2:
+Stratified Independent Sampling design
+svydesign(ids = ~id, strata = ~id1, fpc = `*phase1*`)
+Probabilities:
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
+ 0.3333 0.3750 0.4286 0.4674 0.6000 0.6000
+Stratum Sizes:
+ 19 31 45 47 50
+obs 3 3 3 3 3
+design.PSU 3 3 3 3 3
+actual.PSU 3 3 3 3 3
+Population stratum sizes (PSUs):
+19 31 45 47 50
+ 5 7 8 5 9
+Data variables:
+[1] "id1" "n1" "id2" "y1" "n2" "id" "sub"
+> t1<-svytotal(~y1, dmu284)
+> t2<-svytotal(~y1, d2mu284)
+> t22<-svytotal(~y1,d22mu284)
+> m1<-svymean(~y1, dmu284)
+> m2<-svymean(~y1, d2mu284)
+> m22<-svymean(~y1, d22mu284)
+> all.equal(coef(t1),coef(t2))
+[1] TRUE
+> all.equal(coef(t1),coef(t22))
+[1] TRUE
+> all.equal(coef(m1),coef(m2))
+[1] TRUE
+> all.equal(coef(m1),coef(m22))
+[1] TRUE
+> all.equal(as.vector(SE(m1)),as.vector(SE(m2)))
+[1] TRUE
+> all.equal(as.vector(SE(m1)),as.vector(SE(m22)))
+[1] TRUE
+> all.equal(as.vector(SE(t1)),as.vector(SE(t2)))
+[1] TRUE
+> all.equal(as.vector(SE(t1)),as.vector(SE(t22)))
+[1] TRUE
+>
+> ## case-cohort design
+> ##this example requires R 2.3.1 or later for cch and data.
+> library("survival")
+> data(nwtco, package="survival")
+> ## unstratified, equivalent to Lin & Ying (1993)
+> print(dcchs<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
++ subset=~I(in.subcohort | rel), data=nwtco))
+Two-phase sparse-matrix design:
+ twophase2(id = id, strata = strata, probs = probs, fpc = fpc,
+ subset = subset, data = data)
+Phase 1:
+Independent Sampling design (with replacement)
+svydesign(ids = ~seqno)
+Phase 2:
+Stratified Independent Sampling design
+svydesign(ids = ~seqno, strata = ~rel, fpc = `*phase1*`)
+> cch1<-svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
++ design=dcchs)
+> dcchs2<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
++ subset=~I(in.subcohort | rel), data=nwtco,method="approx")
+> cch1.2<-svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
++ design=dcchs)
+> all.equal(coef(cch1),coef(cch1.2))
+[1] TRUE
+> all.equal(SE(cch1),SE(cch1.2))
+[1] TRUE
+> ## Using survival::cch
+> subcoh <- nwtco$in.subcohort
+> selccoh <- with(nwtco, rel==1|subcoh==1)
+> ccoh.data <- nwtco[selccoh,]
+> ccoh.data$subcohort <- subcoh[selccoh]
+> cch2<-cch(Surv(edrel, rel) ~ factor(stage) + factor(histol) + I(age/12),
++ data =ccoh.data, subcoh = ~subcohort, id=~seqno,
++ cohort.size=4028, method="LinYing", robust=TRUE)
+>
+> print(all.equal(as.vector(coef(cch1)),as.vector(coef(cch2))))
+[1] TRUE
+> ## cch has smaller variances by a factor of 1.0005 because
+> ## there is a (n/(n-1)) in the survey phase1 varianace
+> print(all.equal(as.vector(SE(cch1)), as.vector(SE(cch2)),tolerance=0.0006))
+[1] TRUE
+>
+>
+> ## bug report from Takahiro Tsuchiya for version 3.4
+> ## We used to not match Sarndal exactly, because our old phase-one
+> ## estimator had a small bias for finite populations
+> rei<-read.table(tmp<-textConnection(
++ " id N n.a h n.ah n.h sub y
++ 1 1 300 20 1 12 5 TRUE 1
++ 2 2 300 20 1 12 5 TRUE 2
++ 3 3 300 20 1 12 5 TRUE 3
++ 4 4 300 20 1 12 5 TRUE 4
++ 5 5 300 20 1 12 5 TRUE 5
++ 6 6 300 20 1 12 5 FALSE NA
++ 7 7 300 20 1 12 5 FALSE NA
++ 8 8 300 20 1 12 5 FALSE NA
++ 9 9 300 20 1 12 5 FALSE NA
++ 10 10 300 20 1 12 5 FALSE NA
++ 11 11 300 20 1 12 5 FALSE NA
++ 12 12 300 20 1 12 5 FALSE NA
++ 13 13 300 20 2 8 3 TRUE 6
++ 14 14 300 20 2 8 3 TRUE 7
++ 15 15 300 20 2 8 3 TRUE 8
++ 16 16 300 20 2 8 3 FALSE NA
++ 17 17 300 20 2 8 3 FALSE NA
++ 18 18 300 20 2 8 3 FALSE NA
++ 19 19 300 20 2 8 3 FALSE NA
++ 20 20 300 20 2 8 3 FALSE NA
++ "), header=TRUE)
+> close(tmp)
+>
+> des.rei <- twophase(id=list(~id,~id), strata=list(NULL,~h),
++ fpc=list(~N,NULL), subset=~sub, data=rei, method="approx")
+> tot<- svytotal(~y, des.rei)
+> des.rei2 <- twophase(id=list(~id,~id), strata=list(NULL,~h),
++ fpc=list(~N,NULL), subset=~sub, data=rei)
+> tot2<- svytotal(~y, des.rei2)
+>
+> ## based on Sarndal et al (9.4.14)
+> rei$w.ah <- rei$n.ah / rei$n.a
+> a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE)
+> a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE)
+> a.rei$y.u <- sum(a.rei$w.ah * a.rei$y)
+> V <- with(a.rei, sum(N * (N-1) * ((n.ah-1)/(n.a-1) - (n.h-1)/(N-1)) * w.ah * S.ysh / n.h))
+> V <- V + with(a.rei, sum(N * (N-n.a) * w.ah * (y - y.u)^2 / (n.a-1)))
+>
+> a.rei$f.h<-with(a.rei, n.h/n.ah)
+> Vphase2<-with(a.rei, sum(N*N*w.ah^2* ((1-f.h)/n.h) *S.ysh))
+>
+> a.rei$f<-with(a.rei, n.a/N)
+> a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1))
+> Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2)))
+>
+> V
+[1] 36522.63
+> Vphase1
+[1] 24072.63
+> Vphase2
+[1] 12450
+> vcov(tot)
+ y
+y 35911.05
+attr(,"phases")
+attr(,"phases")$phase1
+ [,1]
+[1,] 23461.05
+
+attr(,"phases")$phase2
+ y
+y 12450
+
+> vcov(tot2)
+ [,1]
+[1,] 36522.63
+attr(,"phases")
+attr(,"phases")$phase1
+ [,1]
+[1,] 24072.63
+
+attr(,"phases")$phase2
+ [,1]
+[1,] 12450
+
+> ## phase 2 identical
+> all.equal(Vphase2,drop(attr(vcov(tot),"phases")$phase2))
+[1] TRUE
+> all.equal(Vphase2,drop(attr(vcov(tot2),"phases")$phase2))
+[1] TRUE
+> ## phase 1 differs by 2.6% for old twophase estimator
+> Vphase1/attr(vcov(tot),"phases")$phase1
+ [,1]
+[1,] 1.026068
+> all.equal(Vphase1,as.vector(attr(vcov(tot2),"phases")$phase1))
+[1] TRUE
+>
+>
+> proc.time()
+ user system elapsed
+ 1.967 0.106 2.094
diff --git a/vignettes/domain.Rnw b/vignettes/domain.Rnw
new file mode 100644
index 0000000..46f3703
--- /dev/null
+++ b/vignettes/domain.Rnw
@@ -0,0 +1,111 @@
+\documentclass{article}
+\usepackage{url}
+%\VignetteIndexEntry{Estimates in subpopulations}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{Estimates in subpopulations.}
+
+\begin{document}
+\maketitle
+
+Estimating a mean or total in a subpopulation (domain) from a survey, eg the
+mean blood pressure in women, is not done simply by taking the subset
+of data in that subpopulation and pretending it is a new survey. This
+approach would give correct point estimates but incorrect standard
+errors.
+
+The standard way to derive domain means is as ratio estimators. I
+think it is easier to derive them as regression coefficients. These
+derivations are not important for R users, since subset operations on
+survey design objects automatically do the necessary adjustments, but
+they may be of interest. The various ways of constructing domain mean
+estimators are useful in quality control for the survey package, and
+some of the examples here are taken from
+\texttt{survey/tests/domain.R}.
+
+
+Suppose that in the artificial \texttt{fpc} data set we want to
+estimate the mean of \texttt{x} when \texttt{x>4}.
+<<>>=
+library(survey)
+data(fpc)
+dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE)
+dsub<-subset(dfpc,x>4)
+svymean(~x,design=dsub)
+@
+
+The \texttt{subset} function constructs a survey design object with
+information about this subpopulation and \texttt{svymean} computes the
+mean. The same operation can be done for a set of subpopulations with
+\texttt{svyby}.
+<<>>=
+svyby(~x,~I(x>4),design=dfpc, svymean)
+@
+
+In a regression model with a binary covariate $Z$ and no intercept,
+there are two coefficients that estimate the mean of the outcome
+variable in the subpopulations with $Z=0$ and $Z=1$, so we can
+construct the domain mean estimator by regression.
+<<>>=
+summary(svyglm(x~I(x>4)+0,design=dfpc))
+@
+
+Finally, the classical derivation of the domain mean estimator is as a
+ratio where the numerator is $X$ for observations in the domain and 0
+otherwise and the denominator is 1 for observations in the domain and
+0 otherwise
+<<>>=
+svyratio(~I(x*(x>4)),~as.numeric(x>4), dfpc)
+@
+
+The estimator is implemented by setting the sampling weight to zero
+for observations not in the domain. For most survey design objects
+this allows a reduction in memory use, since only the number of zero
+weights in each sampling unit needs to be kept. For more complicated
+survey designs, such as post-stratified designs, all the data are kept
+and there is no reduction in memory use.
+
+
+\subsection*{More complex examples}
+Verifying that \texttt{svymean} agrees with the ratio and regression
+derivations is particularly useful for more complicated designs where
+published examples are less readily available.
+
+This example shows calibration (GREG) estimators of domain means for
+the California Academic Performance Index (API).
+<<>>=
+data(api)
+dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
+pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
+gclus1 <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069))
+
+svymean(~api00, subset(gclus1, comp.imp=="Yes"))
+svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), gclus1)
+summary(svyglm(api00~comp.imp-1, gclus1))
+@
+
+Two-stage samples with full finite-population corrections
+<<>>=
+data(mu284)
+dmu284<-svydesign(id=~id1+id2,fpc=~n1+n2, data=mu284)
+
+svymean(~y1, subset(dmu284,y1>40))
+svyratio(~I(y1*(y1>40)),~as.numeric(y1>40),dmu284)
+summary(svyglm(y1~I(y1>40)+0,dmu284))
+@
+
+Stratified two-phase sampling of children with Wilm's Tumor,
+estimating relapse probability for those older than 3 years (36
+months) at diagnosis
+<<>>=
+library("survival")
+data(nwtco)
+nwtco$incc2<-as.logical(with(nwtco, ifelse(rel | instit==2,1,rbinom(nrow(nwtco),1,.1))))
+dccs8<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~interaction(rel,stage,instit)),
+ data=nwtco, subset=~incc2)
+svymean(~rel, subset(dccs8,age>36))
+svyratio(~I(rel*as.numeric(age>36)), ~as.numeric(age>36), dccs8)
+summary(svyglm(rel~I(age>36)+0, dccs8))
+@
+
+\end{document}
diff --git a/vignettes/epi.Rnw b/vignettes/epi.Rnw
new file mode 100644
index 0000000..ee2d7de
--- /dev/null
+++ b/vignettes/epi.Rnw
@@ -0,0 +1,250 @@
+\documentclass{article}
+\usepackage{url}
+\addtolength{\topmargin}{-0.5in}
+\addtolength{\textheight}{0.75in}
+\addtolength{\oddsidemargin}{-0.5in}
+\addtolength{\textwidth}{1in}
+%\VignetteIndexEntry{Two-phase designs in epidemiology}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{Two-phase designs in epidemiology}
+
+\begin{document}
+\maketitle
+This document explains how to analyse case--cohort and two-phase
+case--control studies with the ``survey'' package, using examples from
+\url{http://faculty.washington.edu/norm/software.html}. Some of the
+examples were published by Breslow \& Chatterjee (1999).
+
+The data are relapse rates from the National Wilm's Tumor
+Study (NWTS). Wilm's Tumour is a rare cancer of the kidney in
+children. Intensive treatment cures the majority of cases, but
+prognosis is poor when the disease is advanced at diagnosis and for
+some histological subtypes. The histological characterisation of the
+tumour is difficult, and histological group as determined by the NWTS
+central pathologist predicts much better than determinations by local
+institution pathologists. In fact, local institution histology can be
+regarded statistically as a pure surrogate for the central lab
+histology.
+
+In these examples we will pretend that the (binary) local institution
+histology determination (\texttt{instit}) is avavailable for all
+children in the study and that the central lab histology
+(\texttt{histol}) is obtained for a probability sample of specimens in
+a two-phase design. We treat the initial sampling of the study as
+simple random sampling from an infinite superpopulation. We also have
+data on disease stage, a four-level variable; on relapse; and on time
+to relapse.
+
+\section*{Case--control designs}
+
+Breslow \& Chatterjee (1999) use the NWTS data to illustrate two-phase
+case--control designs. The data are available at
+\url{http://faculty.washington.edu/norm/software.html} in compressed
+form; we first expand to one record per patient.
+<<>>=
+library(survey)
+load(system.file("doc","nwts.rda",package="survey"))
+nwtsnb<-nwts
+nwtsnb$case<-nwts$case-nwtsb$case
+nwtsnb$control<-nwts$control-nwtsb$control
+
+a<-rbind(nwtsb,nwtsnb)
+a$in.ccs<-rep(c(TRUE,FALSE),each=16)
+
+b<-rbind(a,a)
+b$rel<-rep(c(1,0),each=32)
+b$n<-ifelse(b$rel,b$case,b$control)
+index<-rep(1:64,b$n)
+
+nwt.exp<-b[index,c(1:3,6,7)]
+nwt.exp$id<-1:4088
+@
+
+As we actually do know \texttt{histol} for all patients we can fit the logistic regression model with full sampling to compare with the two-phase analyses
+<<>>=
+glm(rel~factor(stage)*factor(histol), family=binomial, data=nwt.exp)
+@
+
+ The second phase sample consists of all patients with unfavorable
+ histology as determined by local institution pathologists, all cases,
+ and a 20\% sample of the remainder. Phase two is thus a stratified
+ random sample without replacement, with strata defined by the
+ interaction of \texttt{instit} and \texttt{rel}.
+
+<<>>=
+dccs2<-twophase(id=list(~id,~id),subset=~in.ccs,
+ strata=list(NULL,~interaction(instit,rel)),data=nwt.exp)
+
+summary(svyglm(rel~factor(stage)*factor(histol),family=binomial,design=dccs2))
+@
+
+Disease stage at the time of surgery is also recorded. It could be
+used to further stratify the sampling, or, as in this example, to
+post-stratify. We can analyze the data either pretending that the
+sampling was stratified or using \texttt{calibrate} to post-stratify
+the design.
+
+<<>>=
+dccs8<-twophase(id=list(~id,~id),subset=~in.ccs,
+ strata=list(NULL,~interaction(instit,stage,rel)),data=nwt.exp)
+gccs8<-calibrate(dccs2,phase=2,formula=~interaction(instit,stage,rel))
+
+summary(svyglm(rel~factor(stage)*factor(histol),family=binomial,design=dccs8))
+summary(svyglm(rel~factor(stage)*factor(histol),family=binomial,design=gccs8))
+@
+
+
+\section*{Case--cohort designs}
+In the case--cohort design for survival analysis, a $P$\% sample of a cohort
+is taken at recruitment for the second phase, and all participants who
+experience the event (cases) are later added to the phase-two sample.
+
+Viewing the sampling design as progressing through time in this way,
+as originally proposed, gives a double sampling design at phase two.
+It is simpler to view the process \emph{sub specie aeternitatis}, and
+to note that cases are sampled with probability 1, and controls with
+probability $P/100$. The subcohort will often be determined
+retrospectively rather than at recruitment, giving stratified random
+sampling without replacement, stratified on case status. If the
+subcohort is determined prospectively we can use the same analysis,
+post-stratifying rather than stratifying.
+
+There have been many analyses proposed for the case--cohort design
+(Therneau \& Li, 1999). We consider only those that can be expressed as a
+Horvitz--Thompson estimator for the Cox model.
+
+
+
+First we load the data and the necessary packages. The version of the
+NWTS data that includes survival times is not identical to the data
+set used for case--control analyses above.
+<<>>=
+library(survey)
+library(survival)
+data(nwtco)
+ntwco<-subset(nwtco, !is.na(edrel))
+@
+
+Again, we fit a model that uses \texttt{histol} for all patients, to compare with the two-phase design
+<<>>=
+coxph(Surv(edrel, rel)~factor(stage)+factor(histol)+I(age/12),data=nwtco)
+@
+
+We define a two-phase survey design using simple random
+superpopulation sampling for the first phase, and sampling without
+replacement stratified on \texttt{rel} for the second phase. The
+\texttt{subset} argument specifies that observations are in the phase-two sample if they are in the subcohort or are cases. As before, the data structure is rectangular, but variables measured at phase two may be \texttt{NA} for participants not included at phase two.
+
+We compare the result to that given by \texttt{survival::cch} for Lin
+\& Ying's (1993) approach to the case--cohort design.
+
+
+<<>>=
+(dcch<-twophase(id=list(~seqno,~seqno), strata=list(NULL,~rel),
+ subset=~I(in.subcohort | rel), data=nwtco))
+svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=dcch)
+
+subcoh <- nwtco$in.subcohort
+selccoh <- with(nwtco, rel==1|subcoh==1)
+ccoh.data <- nwtco[selccoh,]
+ccoh.data$subcohort <- subcoh[selccoh]
+cch(Surv(edrel, rel) ~ factor(stage) + factor(histol) + I(age/12),
+ data =ccoh.data, subcoh = ~subcohort, id=~seqno,
+ cohort.size=4028, method="LinYing")
+@
+
+
+Barlow (1994) proposes an analysis that ignores the finite population
+correction at the second phase. This simplifies the standard error
+estimation, as the design can be expressed as one-phase stratified
+superpopulation sampling. The standard errors will be somewhat
+conservative. More data preparation is needed for this analysis as the
+weights change over time.
+<<>>=
+nwtco$eventrec<-rep(0,nrow(nwtco))
+nwtco.extra<-subset(nwtco, rel==1)
+nwtco.extra$eventrec<-1
+nwtco.expd<-rbind(subset(nwtco,in.subcohort==1),nwtco.extra)
+nwtco.expd$stop<-with(nwtco.expd,
+ ifelse(rel & !eventrec, edrel-0.001,edrel))
+nwtco.expd$start<-with(nwtco.expd,
+ ifelse(rel & eventrec, edrel-0.001, 0))
+nwtco.expd$event<-with(nwtco.expd,
+ ifelse(rel & eventrec, 1, 0))
+nwtco.expd$pwts<-ifelse(nwtco.expd$event, 1, 1/with(nwtco,mean(in.subcohort | rel)))
+@
+
+The analysis corresponds to a cluster-sampled design in which
+individuals are sampled stratified by subcohort membership and then
+time periods are sampled stratified by event status. Having
+individual as the primary sampling unit is necessary for correct
+standard error calculation.
+
+<<>>=
+(dBarlow<-svydesign(id=~seqno+eventrec, strata=~in.subcohort+rel,
+ data=nwtco.expd, weight=~pwts))
+svycoxph(Surv(start,stop,event)~factor(stage)+factor(histol)+I(age/12),
+ design=dBarlow)
+@
+
+In fact, as the finite population correction is not being used the second stage of the cluster sampling could be ignored. We can also produce the stratified bootstrap standard errors of Wacholder et al (1989), using a replicate weights analysis
+
+<<>>=
+(dWacholder <- as.svrepdesign(dBarlow,type="bootstrap",replicates=500))
+svycoxph(Surv(start,stop,event)~factor(stage)+factor(histol)+I(age/12),
+ design=dWacholder)
+@
+
+
+\subsection*{Exposure-stratified designs}
+
+
+Borgan et al (2000) propose designs stratified or post-stratified on
+phase-one variables. The examples at
+\url{http://faculty.washington.edu/norm/software.html} use a different
+subcohort sample for this stratified design, so we load the new
+\texttt{subcohort} variable
+<<>>=
+load(system.file("doc","nwtco-subcohort.rda",package="survey"))
+nwtco$subcohort<-subcohort
+
+d_BorganII <- twophase(id=list(~seqno,~seqno),
+ strata=list(NULL,~interaction(instit,rel)),
+ data=nwtco, subset=~I(rel |subcohort))
+(b2<-svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=d_BorganII))
+@
+
+
+We can further post-stratify the design on disease stage and age with \texttt{calibrate}
+<<>>=
+d_BorganIIps <- calibrate(d_BorganII, phase=2, formula=~age+interaction(instit,rel,stage))
+svycoxph(Surv(edrel,rel)~factor(stage)+factor(histol)+I(age/12),
+ design=d_BorganIIps)
+@
+
+
+\section*{References}
+
+Barlow WE (1994). Robust variance estimation for the case-cohort
+design. \emph{Biometrics} 50: 1064-1072
+
+Borgan \O, Langholz B, Samuelson SO, Goldstein L and Pogoda J (2000). Exposure stratified case-cohort designs, \emph{Lifetime Data Analysis} 6:39-58
+
+Breslow NW and Chatterjee N. (1999) Design and analysis of two-phase
+studies with binary outcome applied to Wilms tumour prognosis. \emph{Applied
+Statistics} 48:457-68.
+
+
+Lin DY, and Ying Z (1993). Cox regression with incomplete covariate measurements.
+\emph{Journal of the American Statistical Association} 88: 1341-1349.
+
+Therneau TM and Li H., Computing the Cox model for case-cohort
+designs. \emph{Lifetime Data Analysis} 5:99-112, 1999
+
+Wacholder S, Gail MH, Pee D, and Brookmeyer R (1989)
+Alternate variance and efficiency calculations for the case-cohort design
+\emph{Biometrika}, 76, 117-123
+\end{document}
diff --git a/vignettes/phase1.Rnw b/vignettes/phase1.Rnw
new file mode 100644
index 0000000..7e20f38
--- /dev/null
+++ b/vignettes/phase1.Rnw
@@ -0,0 +1,134 @@
+\documentclass{article}
+\usepackage{url}
+\addtolength{\topmargin}{-0.5in}
+\addtolength{\textheight}{0.75in}
+\addtolength{\oddsidemargin}{-0.5in}
+\addtolength{\textwidth}{1in}
+%\VignetteIndexEntry{Obsolete formulas for two-phase variances}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{Obsolete formulas for two-phase variances}
+
+\begin{document}
+\maketitle
+This document explains the computation of variances for totals in
+two-phase designs before version 3.15, or using \texttt{method=''approx''}. Since version 3.15 the variances are computed directly using a sparse-matrix representation of the covariance of sampling indicators, and agree exactly with the formulas in Section 9.3 of S\"arndal, Swensson, and Wretman.
+Variances for other statistics are computed by the
+delta-method from the variance of the total of the estimating
+functions.
+
+The variance formulas come from conditioning on the sample selected in
+the first phase
+$$\textrm{var}[\hat T]=E\left[\textrm{var}\left[\hat T|\textrm{phase 1}\right]\right]+\textrm{var}\left[E\left[\hat T|\textrm{phase 1}\right]\right]$$
+
+The first term is estimated by the variance of $\hat T$ considering
+the phase one sample as the fixed population, and so uses the same
+computations as any single-phase design. The second term is the
+variance of $\hat T$ if complete data were available for the phase-one
+sample. This takes a little more work.
+
+
+The variance computations for a stratified, clustered, multistage
+design involve recursively computing a within-stratum variance for the
+total over sampling units at the next stage. That is, we want to
+compute
+$$s^2=\frac{1}{n-1}\sum_{i=1}^n (X_i-\bar X)$$
+where $X_i$ are $\pi$-expanded observations, perhaps summed over sampling units.
+A natural estimator of $s^2$ when only some observations are present in the phase-two sample is
+$$\hat s^2=\frac{1}{n-1}\sum_{i=1}^n \frac{R_i}{\pi_i} (X_i-\hat{\bar X})$$
+where $\pi_i$ is the probability that $X_i$ is available and $R_i$ is the indicator that $X_i$ is available. We also need an estimator for $\bar X$, and a natural one is
+$$\hat{\bar X}=\frac{1}{n}\sum_{i=1}^n \frac{R_i}{\pi_i}X_i$$
+
+This is not an unbiased estimator of $s^2$ unless $\hat{\bar X}=\bar X$,
+but the bias is of order $O(n_2^{-1})$ where $n_2=\sum_i R_i$ is the
+number of phase-two observations.
+
+If the phase-one design involves only a single stage of sampling then
+$X_i$ is $Y_i/p_i$, where $Y_i$ is the observed value and $p_i$ is the
+phase-one sampling probability. For multistage phase-one designs (not
+yet implemented) $X_i$ will be more complicated, but still feasible to
+automate.
+
+This example shows the unbiased phase-one estimate (from Takahiro
+Tsuchiya) and the estimate I use, in a situation where the phase two
+sample is quite small.
+
+First we read the data
+\begin{verbatim}
+rei<-read.table(textConnection(
+" id N n.a h n.ah n.h sub y
+1 1 300 20 1 12 5 TRUE 1
+2 2 300 20 1 12 5 TRUE 2
+3 3 300 20 1 12 5 TRUE 3
+4 4 300 20 1 12 5 TRUE 4
+5 5 300 20 1 12 5 TRUE 5
+6 6 300 20 1 12 5 FALSE NA
+7 7 300 20 1 12 5 FALSE NA
+8 8 300 20 1 12 5 FALSE NA
+9 9 300 20 1 12 5 FALSE NA
+10 10 300 20 1 12 5 FALSE NA
+11 11 300 20 1 12 5 FALSE NA
+12 12 300 20 1 12 5 FALSE NA
+13 13 300 20 2 8 3 TRUE 6
+14 14 300 20 2 8 3 TRUE 7
+15 15 300 20 2 8 3 TRUE 8
+16 16 300 20 2 8 3 FALSE NA
+17 17 300 20 2 8 3 FALSE NA
+18 18 300 20 2 8 3 FALSE NA
+19 19 300 20 2 8 3 FALSE NA
+20 20 300 20 2 8 3 FALSE NA
+"), header=TRUE)
+\end{verbatim}
+<<echo=FALSE>>=
+rei<-read.table(textConnection(
+" id N n.a h n.ah n.h sub y
+1 1 300 20 1 12 5 TRUE 1
+2 2 300 20 1 12 5 TRUE 2
+3 3 300 20 1 12 5 TRUE 3
+4 4 300 20 1 12 5 TRUE 4
+5 5 300 20 1 12 5 TRUE 5
+6 6 300 20 1 12 5 FALSE NA
+7 7 300 20 1 12 5 FALSE NA
+8 8 300 20 1 12 5 FALSE NA
+9 9 300 20 1 12 5 FALSE NA
+10 10 300 20 1 12 5 FALSE NA
+11 11 300 20 1 12 5 FALSE NA
+12 12 300 20 1 12 5 FALSE NA
+13 13 300 20 2 8 3 TRUE 6
+14 14 300 20 2 8 3 TRUE 7
+15 15 300 20 2 8 3 TRUE 8
+16 16 300 20 2 8 3 FALSE NA
+17 17 300 20 2 8 3 FALSE NA
+18 18 300 20 2 8 3 FALSE NA
+19 19 300 20 2 8 3 FALSE NA
+20 20 300 20 2 8 3 FALSE NA
+"), header=TRUE)
+@
+
+Now, construct a two-phase design object and compute the total of \verb=y=
+<<>>=
+library(survey)
+des.rei <- twophase(id=list(~id,~id), strata=list(NULL,~h),
+ fpc=list(~N,NULL), subset=~sub, data=rei)
+tot<- svytotal(~y, des.rei)
+@
+
+The unbiased estimator is given by equation 9.4.14 of S\"arndal, Swensson, \& Wretman.
+<<>>=
+rei$w.ah <- rei$n.ah / rei$n.a
+a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE)
+a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE)
+a.rei$y.u <- sum(a.rei$w.ah * a.rei$y)
+a.rei$f<-with(a.rei, n.a/N)
+a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1))
+Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2)))
+@
+
+The phase-two contributions (not shown) are identical. The phase-one contributions are quite close
+<<>>=
+Vphase1
+attr(vcov(tot),"phases")$phase1
+@
+
+
+\end{document}
diff --git a/vignettes/pps.Rnw b/vignettes/pps.Rnw
new file mode 100644
index 0000000..7f9867c
--- /dev/null
+++ b/vignettes/pps.Rnw
@@ -0,0 +1,103 @@
+\documentclass{article}
+\usepackage{url}
+\addtolength{\topmargin}{-0.5in}
+\addtolength{\textheight}{0.75in}
+\addtolength{\oddsidemargin}{-0.5in}
+\addtolength{\textwidth}{1in}
+%\VignetteIndexEntry{Analysing PPS designs}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{Describing PPS designs to R}
+
+\begin{document}
+\maketitle
+
+The survey package has always supported PPS (ie, arbitrary unequal probability) sampling with replacement, or using the with-replacement single-stage approximation to a multistage design. No special notation is required: just specify the correct sampling weights.
+
+Version 3.11 added an another approximation for PPS sampling without replacement, and version 3.16 added more support. There are two broad classes of estimators for PPS sampling without replacement: approximations to the Horvitz--Thompson and Yates--Grundy estimators based on approximating the pairwise sampling probabilities, and estimators of H\'ajek type that attempt to recover the extra precision of a without-replacement design by conditioning on the estimated population size.
+
+\subsection*{Direct approximations}
+Using the standard recursive algorithm for stratified multistage
+sampling when one or more stages are actually PPS gives an
+approximation due to Brewer. This is simple to compute, always
+non-negative, and appears to be fairly efficient.
+
+
+
+\subsection*{Approximating $\pi_{ij}$}
+Given the pairwise sampling probabilities $\pi_{ij}$ we can define the weighted covariance of sampling indicators
+$$\check{\Delta}_{ij} = 1-\frac{\pi_i\pi_j}{\pi_{ij}}$$
+ and the weighted observations
+ $$\check{x}_i=\frac{1}{\pi_i}x_i.$$
+
+Two unbiased estimators of the variance of the total of $x$ are the Horvitz--Thompson estimator
+$$\hat V_{HT}= \sum_{i,j=1}^n \check{\Delta}\check{x}_i\check{x}_j$$
+and the Yates--Grundy(--Sen) estimator
+$$\hat V_{YG}= \frac{1}{2}\sum_{i,j=1}^n \check{\Delta}(\check{x}_i-\check{x}_j)^2$$
+The Yates--Grundy estimator appears to be preferred in most comparisons. It is always non-negative (up to rounding error, at least).
+
+In principle, $\pi_{ij}$ might not be available and various approximations have been proposed. The (truncated) Hartley--Rao approximation is
+$$\check{\Delta}_{ij}=1-\frac{n-\pi_i-\pi_j+\sum_{k=1}^N\pi^2_k/n}{n-1}$$
+which requires knowing $\pi_i$ for all units in the population. The population sum can be estimated from the sample, giving a further approximation
+$$\check{\Delta}_{ij}=1-\frac{n-\pi_i-\pi_j+\sum_{k=1}^n\pi_k/n}{n-1}.$$
+that requires only the sample $\pi_i$. Overton's approximation is
+$$\check{\Delta}_{ij}=1-\frac{n-(\pi_i+\pi_j)/2}{n-1}$$
+which also requires only the sample $\pi_i$.
+
+In practice, given modern computing power, $\pi_{ij}$ should be available either explicitly or by simulation, so the Hartley--Rao and Overton approximations are not particularly useful.
+
+\subsection{Using the PPS estimators}
+At the moment, only Brewer's approximation can be used as a component of multistage sampling, though for any sampling design it is possible to work out the joint sampling probabilities and use the other approaches. The other approaches can be used for cluster sampling or for sampling of individual units. This is likely to change in the future.
+
+To specify a PPS design, the sampling probabilities must be given in the \texttt{prob} argument of \texttt{svydesign}, or in the \texttt{fpc} argument, with \texttt{prob} and \texttt{weight} unspecified. In addition, it is necessary to specify which PPS computation should be used, with the \texttt{pps} argument. The optional \texttt{variance} argument specifies the Horvitz--Thompson (\texttt{variance="HT"}) or Yates--Grundy (\texttt{variance="YG"}) estimator, with the default being \tex [...]
+
+Some estimators require information in addition to the sampling probabilities for units in the sample. This information is supplied to the \texttt{pps=} argument of \texttt{svydesign} using wrapper functions that create objects with appropriate classes. To specify the population sum $\sum pi_i^2/n$ needed for the Hartley--Rao approximation, use \texttt{HR()}, and to specify a matrix of pairwise sampling probabilities use \texttt{ppsmat()}. The function \texttt{HR()} without an argume [...]
+
+The data set \texttt{election} contains county-level voting data from the 2004 US presidential elections, with a PPS sample of size 40 taken using Till\'e's splitting method, from the \texttt{sampling} package. The sampling probabilities vary widely, with Los Angeles County having a probability of 0.9 and many small counties having probabilities less than 0.0005.
+<<>>=
+library(survey)
+data(election)
+summary(election$p)
+summary(election_pps$p)
+@
+
+Some possible survey design specifications for these data are:
+<<>>=
+## Hajek type
+dpps_br<- svydesign(id=~1, fpc=~p, data=election_pps, pps="brewer")
+## Horvitz-Thompson type
+dpps_ov<- svydesign(id=~1, fpc=~p, data=election_pps, pps="overton")
+dpps_hr<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40))
+dpps_hr1<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR())
+dpps_ht<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob))
+## Yates-Grundy type
+dpps_yg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=ppsmat(election_jointprob),variance="YG")
+dpps_hryg<- svydesign(id=~1, fpc=~p, data=election_pps, pps=HR(sum(election$p^2)/40),variance="YG")
+## The with-replacement approximation
+dppswr <-svydesign(id=~1, probs=~p, data=election_pps)
+@
+
+All the without-replacement design objects except for Brewer's method include a matrix $\check{\Delta}$. These can be visualized with the \texttt{image()} method. These plots use the \texttt{lattice} package and so need \texttt{show()} to display them inside a program:
+<<fig=TRUE>>=
+show(image(dpps_ht))
+@
+<<fig=TRUE>>=
+show(image(dpps_ov))
+@
+In this example there are more negative entries in $\check{\Delta}$ with the approximate methods than when the full pairwise sampling matrix is supplied.
+
+The estimated totals are the same with all the methods, but the standard errors are not.
+
+<<>>=
+svytotal(~Bush+Kerry+Nader, dpps_ht)
+svytotal(~Bush+Kerry+Nader, dpps_yg)
+svytotal(~Bush+Kerry+Nader, dpps_hr)
+svytotal(~Bush+Kerry+Nader, dpps_hryg)
+svytotal(~Bush+Kerry+Nader, dpps_hr1)
+svytotal(~Bush+Kerry+Nader, dpps_br)
+svytotal(~Bush+Kerry+Nader, dpps_ov)
+svytotal(~Bush+Kerry+Nader, dppswr)
+@
+
+
+\end{document}
diff --git a/vignettes/survey.Rnw b/vignettes/survey.Rnw
new file mode 100644
index 0000000..14e3a5a
--- /dev/null
+++ b/vignettes/survey.Rnw
@@ -0,0 +1,100 @@
+\documentclass{article}
+\usepackage{url}
+%\VignetteIndexEntry{A survey analysis example}
+\usepackage{Sweave}
+\author{Thomas Lumley}
+\title{A survey analysis example}
+
+\begin{document}
+\maketitle
+
+This document provides a simple example analysis of a survey data set,
+a subsample from the California Academic Performance Index, an annual
+set of tests used to evaluate California schools. The API website,
+including the original data files are at
+\url{http://api.cde.ca.gov}. The subsample was generated as a teaching
+example by Academic Technology Services at UCLA and was obtained from
+\url{http://www.ats.ucla.edu/stat/stata/Library/svy_survey.htm}.
+
+
+We have a cluster sample in which 15 school districts were sampled and
+then all schools in each district. This is in the data frame
+\texttt{apiclus1}, loaded with \texttt{data(api)}. The two-stage sample is
+defined by the sampling unit (\texttt{dnum}) and the population
+size(\texttt{fpc}). Sampling weights are computed from the population
+sizes, but could be provided separately.
+<<>>=
+library(survey)
+data(api)
+dclus1 <- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
+@
+
+The \texttt{svydesign} function returns an object containing the survey data and metadata.
+<<>>=
+summary(dclus1)
+@
+
+We can compute summary statistics to estimate the mean, median, and
+quartiles of the Academic Performance Index in the year 2000, the
+number of elementary, middle, and high schools in the state, the total
+number of students, and the proportion who took the test. Each
+function takes a formula object describing the variables and a survey
+design object containing the data.
+<<>>=
+svymean(~api00, dclus1)
+svyquantile(~api00, dclus1, quantile=c(0.25,0.5,0.75), ci=TRUE)
+svytotal(~stype, dclus1)
+svytotal(~enroll, dclus1)
+svyratio(~api.stu,~enroll, dclus1)
+@
+
+The ordinary R subsetting functions \verb'[' and \texttt{subset} work
+correctly on these survey objects, carrying along the metadata needed
+for valid standard errors. Here we compute the proportion of high
+school students who took the test
+<<>>=
+svyratio(~api.stu, ~enroll, design=subset(dclus1, stype=="H"))
+@
+
+The warnings referred to in the output occured because several
+school districts have only one high school sampled, making the second
+stage standard error estimation unreliable.
+
+Specifying a large number of variables is made easier by the \texttt{make.formula} function
+<<>>=
+vars<-names(apiclus1)[c(12:13,16:23,27:37)]
+svymean(make.formula(vars),dclus1,na.rm=TRUE)
+@
+
+Summary statistics for subsets can also be computed with
+\texttt{svyby}. Here we compute the average proportion of ``English
+language learners'' and of students eligible for subsidized school
+meals for elementary, middle, and high schools
+<<>>=
+svyby(~ell+meals, ~stype, design=dclus1, svymean)
+@
+
+
+Regression models show that these socieconomic variables predict API score and whether the school achieved its API target
+<<>>=
+regmodel <- svyglm(api00~ell+meals,design=dclus1)
+logitmodel <- svyglm(I(sch.wide=="Yes")~ell+meals, design=dclus1, family=quasibinomial())
+summary(regmodel)
+summary(logitmodel)
+@
+
+We can calibrate the sampling using the statewide total for the previous year's API
+<<>>=
+gclus1 <- calibrate(dclus1, formula=~api99, population=c(6194, 3914069))
+@
+which improves estimation of some quantities
+<<>>=
+svymean(~api00, gclus1)
+svyquantile(~api00, gclus1, quantile=c(0.25,0.5,0.75), ci=TRUE)
+svytotal(~stype, gclus1)
+svytotal(~enroll, gclus1)
+svyratio(~api.stu,~enroll, gclus1)
+@
+
+
+\end{document}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-survey.git
More information about the debian-science-commits
mailing list