[r-cran-eco] 22/30: New upstream version 4.0-1
Andreas Tille
tille at debian.org
Thu Sep 7 07:20:59 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-eco.
commit d02b69d7b263bc673463c84af14fb7a7a8d6212c
Author: Andreas Tille <tille at debian.org>
Date: Thu Sep 7 09:05:37 2017 +0200
New upstream version 4.0-1
---
ChangeLog | 1 +
DESCRIPTION | 32 ++--
MD5 | 90 ++++++-----
NAMESPACE | 4 +-
R/Qfun.R | 29 ++++
R/census.R | 28 ++++
R/eco.R | 163 +++++++++++++++++++
R/ecoBD.R | 102 ++++++++++++
R/ecoNP.R | 167 +++++++++++++++++++
R/emeco.R | 209 +++++++++++++++++++++++-
R/forgnlit30.R | 26 +++
R/forgnlit30c.R | 31 ++++
R/housep88.R | 32 ++++
R/predict.eco.R | 44 +++++
R/predict.ecoNP.R | 47 ++++++
R/predict.ecoNPX.R | 50 ++++++
R/predict.ecoX.R | 50 ++++++
R/print.summary.eco.R | 31 ++++
R/print.summary.ecoML.R | 49 ++++++
R/print.summary.ecoNP.R | 32 ++++
R/reg.R | 25 +++
R/summary.eco.R | 42 +++++
R/summary.ecoML.R | 61 ++++++-
R/summary.ecoNP.R | 43 +++++
R/wallace.R | 30 ++++
man/Qfun.Rd | 66 ++++----
man/census.Rd | 53 +++---
man/eco.Rd | 328 ++++++++++++++++++-------------------
man/ecoBD.Rd | 209 +++++++++++-------------
man/ecoML.Rd | 394 ++++++++++++++++++++++-----------------------
man/ecoNP.Rd | 308 +++++++++++++++++------------------
man/forgnlit30.Rd | 47 +++---
man/forgnlit30c.Rd | 51 +++---
man/housep88.Rd | 61 +++----
man/predict.eco.Rd | 123 ++++++--------
man/predict.ecoNP.Rd | 126 +++++++--------
man/predict.ecoNPX.Rd | 72 +++++++++
man/predict.ecoX.Rd | 72 +++++++++
man/print.summary.eco.Rd | 46 ++++++
man/print.summary.ecoML.Rd | 63 ++++++++
man/print.summary.ecoNP.Rd | 48 ++++++
man/reg.Rd | 46 +++---
man/summary.eco.Rd | 109 ++++++-------
man/summary.ecoML.Rd | 139 ++++++++--------
man/summary.ecoNP.Rd | 107 ++++++------
man/wallace.Rd | 51 +++---
src/gibbsEM.c | 6 +-
src/gibbsXBase.c | 2 +-
src/gibbsZBase.c | 2 +-
src/init.c | 40 +++++
src/preBaseX.c | 2 +-
src/preDPX.c | 2 +-
52 files changed, 2731 insertions(+), 1260 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 50e4988..278252f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,4 @@
+4.0-1 05.10.17 Roxygen2 compliant, C functions registered
3.1-7 03.04.15 minor fixes
3.1-6 06.12.12 minor fixes
3.1-5 05.29.12 minor fixes
diff --git a/DESCRIPTION b/DESCRIPTION
index 8d0ee5c..976584a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,14 +1,18 @@
Package: eco
-Version: 3.1-7
-Date: 2015-3-4
+Version: 4.0-1
+Date: 2017-7-26
Title: Ecological Inference in 2x2 Tables
-Author: Kosuke Imai <kimai at princeton.edu>,
- Ying Lu <yl46 at nyu.edu>,
- Aaron B Strauss <aaronbstrauss at gmail.com>.
-Maintainer: Ying Lu <yl46 at nyu.edu>
+Authors at R: c(
+ person("Kosuke", "Imai", , "kimai at Princeton.Edu", c("aut")),
+ person("Ying", "Lu", , "ying.lu at nyu.edu", c("aut", "cre")),
+ person("Aaron", "Strauss", , "aaronbstrauss at gmail.com", c("aut")),
+ person("Hubert", "Jin", , "hubertj at princeton.edu", c("ctb"))
+ )
+Maintainer: Ying Lu <ying.lu at nyu.edu>
Depends: R (>= 2.0), MASS, utils
-Description: We implement the Bayesian and likelihood methods proposed
- in Imai, Lu, and Strauss (2008, 2011) for ecological inference in 2
+Description: Implements the Bayesian and likelihood methods proposed
+ in Imai, Lu, and Strauss (2008 <DOI: 10.1093/pan/mpm017>) and
+ (2011 <DOI:10.18637/jss.v042.i05>) for ecological inference in 2
by 2 tables as well as the method of bounds introduced by Duncan and
Davis (1953). The package fits both parametric and nonparametric
models using either the Expectation-Maximization algorithms (for
@@ -22,8 +26,14 @@ Description: We implement the Bayesian and likelihood methods proposed
LazyLoad: yes
LazyData: yes
License: GPL (>= 2)
-URL: http://imai.princeton.edu/software/eco.html
-Packaged: 2015-03-05 04:04:38 UTC; kimai
+URL: https://github.com/kosukeimai/eco
+BugReports: https://github.com/kosukeimai/eco/issues
+RoxygenNote: 6.0.1
NeedsCompilation: yes
+Packaged: 2017-07-27 03:00:22 UTC; kimai
+Author: Kosuke Imai [aut],
+ Ying Lu [aut, cre],
+ Aaron Strauss [aut],
+ Hubert Jin [ctb]
Repository: CRAN
-Date/Publication: 2015-03-19 09:46:08
+Date/Publication: 2017-08-01 05:24:50 UTC
diff --git a/MD5 b/MD5
index 716cbc7..f16eafb 100644
--- a/MD5
+++ b/MD5
@@ -1,35 +1,41 @@
-0efc2b6fe49b5092654611007d140f1b *ChangeLog
-e5fb4bacf6fa9326d5a8d5af7bc9b9ef *DESCRIPTION
-848dcabf0d80bb915d0ff13dedb91cda *NAMESPACE
-a12c92eb0ddc56e282ad6ff3a990bff1 *R/Qfun.R
+a97e444214f6bc2f0dc91e77059aeb58 *ChangeLog
+69fcff33f43261b9ebb165aee6079d2b *DESCRIPTION
+82661102615f33d350638c7923217ea3 *NAMESPACE
+82bfbbce207fde61df8337e9e842f6a1 *R/Qfun.R
+b74ced70c45cdb899135650f263f04fe *R/census.R
19b54ec2f8a821715be3881948ab4dfd *R/checkdata.R
b739dfc6d215a18af2bbeed867ce8dd2 *R/coef.eco.R
4e0835221af546dde07cc910439fd29c *R/coef.ecoNP.R
-0380a6adab657ee90e7645b9c61eb3ae *R/eco.R
-633e104923566a9d6845f2608b61bc95 *R/ecoBD.R
+c43a1de04f4f229ed5d6617d193413bc *R/eco.R
+fc0afad120e9c0a80cc1db98ff519134 *R/ecoBD.R
ec74fb120755d764a8c07ef534831b50 *R/ecoCV.R
-71b51118a534a8ce46b72edd9605f521 *R/ecoNP.R
+005a4d42a4789517caf062a33011abae *R/ecoNP.R
d571c7dcb6ffae9532bff0b392edd3a1 *R/ecoRC.R
-5b315d021447532f4ccdd8ff22ef1389 *R/emeco.R
+42549ecc00ca7e5580315616518b8709 *R/emeco.R
6c644c7e6e990443dcf79b10722e8180 *R/eminfo.R
+0ecadfff0304da67d092fea6ae6e7e9d *R/forgnlit30.R
+6460331fb0043c2c77d012e4a0a89830 *R/forgnlit30c.R
+fedd2983b7afaea7298382918e90115d *R/housep88.R
8b136280b6d870259d087afe9fb8f5c6 *R/logit.R
74be3d5191777b2fd500f602a998e000 *R/onAttach.R
-d0c94137a617835be0f5511e270d080a *R/predict.eco.R
-435be8169cf6b3f72b393dbe0c9a958e *R/predict.ecoNP.R
-aceb01f7d2bc7917dbb13d8928a3caf1 *R/predict.ecoNPX.R
-a5a193d0e7084dae925a60ebac7a7b46 *R/predict.ecoX.R
+943ad8b1c83e94027885bd15dccec97f *R/predict.eco.R
+211fd8465e7d1832c720f7af2b41a217 *R/predict.ecoNP.R
+ecc1895271f0c80d1e439d264250c2d7 *R/predict.ecoNPX.R
+5b19a3e918ccff3c9969c25226b4803b *R/predict.ecoX.R
ec3456b70b939df343420cc1d3f6aa14 *R/print.eco.R
076356bd800db18b690efb01af2cc8ba *R/print.ecoBD.R
14bcd129eb23ba5341fbc7e49d53c121 *R/print.ecoML.R
-800b946b39952f29af0798d8f1c1775e *R/print.summary.eco.R
-6dc5abe1a4b05d43977cad038929717a *R/print.summary.ecoML.R
-5e9703d464de9e121513aa88f3b41248 *R/print.summary.ecoNP.R
+395905d2e7ab1b95fbdf638d7daf1968 *R/print.summary.eco.R
+da8f571bd9e956968e9fbde845fd79a6 *R/print.summary.ecoML.R
+0f94822aeda6d553bddcecad380921e4 *R/print.summary.ecoNP.R
053e97b9a3e773e0f451394ba26e1f52 *R/print.summary.predict.eco.R
-4197ad73e0cda8e322624b103181f24d *R/summary.eco.R
-1d215cc25dc2d076dfcc855e3b9ac032 *R/summary.ecoML.R
-9179509565faf9d20126af8a8ce15472 *R/summary.ecoNP.R
+c0265cba5df4817d065bc0ea179351ac *R/reg.R
+b9b3e61083ba6cc7ad26d906f29d55f8 *R/summary.eco.R
+6c6e77fffcc64accc9d1cd821db5f2c7 *R/summary.ecoML.R
+4f9a6fac61c66f10773b2798fa9cf249 *R/summary.ecoNP.R
b16954c324c6f0cda61d97a97e58c626 *R/summary.predict.eco.R
c3b40e569a714b410c08a60c3881db57 *R/varcov.R
+0d0a29d123b6cefc03ac008d319ee66f *R/wallace.R
4999fe2d5c10c20ef7953c519ea4b864 *data/census.txt.gz
bab848bf01c09ff663551d74a091aacb *data/forgnlit30.txt.gz
72a68f958f3d09b6bfbc57ac2999c31c *data/forgnlit30c.txt.gz
@@ -37,22 +43,27 @@ bab848bf01c09ff663551d74a091aacb *data/forgnlit30.txt.gz
997a61242cc887b3b0e7167b850dc5fd *data/reg.txt.gz
2329fbd925f12169a0b92cbb3bc8863d *data/wallace.txt.gz
6762185d8bce0126591ba4a32c5dd39c *inst/CITATION
-782b7eb01b5171c0656078aa0cd3c1c1 *man/Qfun.Rd
-1802c44a46ce275ca7b7abb6a1949f4a *man/census.Rd
-c858035b463b515ba9f808531665ffd7 *man/eco.Rd
-ba3be2c488597029a463f50cfdc4a183 *man/ecoBD.Rd
-5a5f4fc02cb0ec538d64c462785b3949 *man/ecoML.Rd
-d5bb914cc1bf288d3bb94d25308c3f4e *man/ecoNP.Rd
-b8c243e39c2a1d88d8dbc8b383364757 *man/forgnlit30.Rd
-b0cad00cf2f9b995d3e91dc5d36402ba *man/forgnlit30c.Rd
-5d902872834f60a3eba7eedaf0c3302d *man/housep88.Rd
-08ece1caeeb95b698b367841eaa63757 *man/predict.eco.Rd
-f91815b61c41797a0170b20ae8bdaacc *man/predict.ecoNP.Rd
-a3f368fa54b74a5cc31dc0a6faa3299b *man/reg.Rd
-bde5fe44fe6ff364eb802507d6d98d2d *man/summary.eco.Rd
-5f674012c4a82858e9bbdc397c7bf50b *man/summary.ecoML.Rd
-b64276d6153b60d016297acb49e5e386 *man/summary.ecoNP.Rd
-13a44404b6affc8437796b1b3d104a0f *man/wallace.Rd
+24e77ebb2f3a53c3394c2ccb99a48fea *man/Qfun.Rd
+7b20faf0e8f3ec97bbac0320bd3a3c10 *man/census.Rd
+900ba355c27630ab275274f86bcd4784 *man/eco.Rd
+9befa053e1b48ad1c4aa21db9002643c *man/ecoBD.Rd
+10f78bb22d8a352f2781eb68988c8668 *man/ecoML.Rd
+c1c71c88af648e9ebc1c5b45cf5ce506 *man/ecoNP.Rd
+c520fd06d22c76c92a45ae84bd520691 *man/forgnlit30.Rd
+cc4ba227d20570081def3f9830151e9f *man/forgnlit30c.Rd
+d025fe7ff575d074d108f09aa5e80002 *man/housep88.Rd
+19323471a8664780b2eb9465b55475b8 *man/predict.eco.Rd
+4f423bc5c7fa02c071f4e23eec718c9b *man/predict.ecoNP.Rd
+56675900f901c4008a15172bc7f1a786 *man/predict.ecoNPX.Rd
+58a352241a365fee452945573780d79c *man/predict.ecoX.Rd
+3dd1539fe012fa5480a3821fe5ef1f86 *man/print.summary.eco.Rd
+54f935978fa7cb7ebf19664094327543 *man/print.summary.ecoML.Rd
+bf995b21af9506cd47b2e82776d07fb3 *man/print.summary.ecoNP.Rd
+7f26c7c6bd63a4014b6b5f185b89687b *man/reg.Rd
+524bc89a424071e15f8c1e9135a48dfa *man/summary.eco.Rd
+25c5e39725905952364835a506ffcc54 *man/summary.ecoML.Rd
+92d2d80cb3bc32cd8a446a0bd6b3ba58 *man/summary.ecoNP.Rd
+8b8d3acc9b7559381dd73afce8950724 *man/wallace.Rd
f009e46fcf131d28ea4ead122961b7bd *src/Makevars
61d0335fbb10bbacb4a0c133d1eda041 *src/bayes.c
de17d4ca6e1eadef448d31e5bb278be9 *src/bayes.h
@@ -62,14 +73,15 @@ e4f4765b9dbde486d170894f9084daf5 *src/gibbsBase.c
82a00f75e3c796b5b66e24a5b3fc1198 *src/gibbsBase2C.c
cd03908bee96f1538c50e907fcd5eb7b *src/gibbsBaseRC.c
33f0950281de3da321bfe80cac8e00c9 *src/gibbsDP.c
-83ea2ab7d26ed9f7a82d18922d4eb24f *src/gibbsEM.c
-7782c2cdfec2269ecddc0ddc56d23b87 *src/gibbsXBase.c
+0f4793b8a4f4ca98a88e5b3ca596072f *src/gibbsEM.c
+48c80c9da450292d7566da2dc4bcc1db *src/gibbsXBase.c
1b532d75ab6bdb2439666141b3d81c6f *src/gibbsXDP.c
-a156b108399967722872d26c50897f8a *src/gibbsZBase.c
+9838dce871783dc1e06daee8da07a315 *src/gibbsZBase.c
+4b04b59b076f6ce11a7977f09892fc99 *src/init.c
c24852e22728b2f506134dd8221e522f *src/macros.h
-787249c325d6acec9d8630ceddb7b923 *src/preBaseX.c
+1f95f3a7a65183b82d0fb016496dc84f *src/preBaseX.c
b0dd2bd8e7ed47ae7d5327055b8b92ca *src/preDP.c
-eda210e63b5ba5c09d54ab8f69d999b7 *src/preDPX.c
+304c6f667aed46416a1336b67ce9cb49 *src/preDPX.c
fcb06c890afff62b4b0e2513a64c5a61 *src/rand.c
9a7e8d0aaa99088d05813349cf590f07 *src/rand.h
fc17ca85ae3c58d8730cfec91fdc06dc *src/sample.c
diff --git a/NAMESPACE b/NAMESPACE
index 3911b0d..6a7ea71 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,8 @@
-useDynLib(eco)
+useDynLib(eco, .registration = TRUE)
importFrom(MASS, mvrnorm)
+importFrom("stats", "as.formula", "coef", "model.frame", "model.matrix", "model.response", "predict", "quantile", "sd", "terms", "weighted.mean")
+importFrom("utils", "packageDescription")
export(eco,
ecoBD,
diff --git a/R/Qfun.R b/R/Qfun.R
index a6905ab..8685c4a 100644
--- a/R/Qfun.R
+++ b/R/Qfun.R
@@ -1,3 +1,32 @@
+#' Fitting the Parametric Bayesian Model of Ecological Inference in 2x2 Tables
+#'
+#' \code{Qfun} returns the complete log-likelihood that is used to calculate
+#' the fraction of missing information.
+#'
+#'
+#' @param theta A vector that contains the MLE \eqn{E(W_1)},\eqn{E(W_2)},
+#' \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. Typically it is the
+#' element \code{theta.em} of an object of class \code{ecoML}.
+#' @param suff.stat A vector of sufficient statistics of \eqn{E(W_1)},
+#' \eqn{E(W_2)}, \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}.
+#' @param n A integer representing the sample size.
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu} Aaron Strauss,
+#' Department of Politics, Princeton University,
+#' \email{abstraus@@Princeton.Edu}.
+#' @seealso \code{ecoML}
+#' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R
+#' Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+#' Software, Vol. 42, No. 5, pp. 1-23. available at
+#' \url{http://imai.princeton.edu/software/eco.html}
+#'
+#' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
+#' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+#' Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available
+#' at \url{http://imai.princeton.edu/research/eiall.html}
+#' @keywords models
Qfun <- function(theta, suff.stat, n) {
mu<-rep(0,2)
Sigma<-matrix(0, 2,2)
diff --git a/R/census.R b/R/census.R
new file mode 100644
index 0000000..73e102c
--- /dev/null
+++ b/R/census.R
@@ -0,0 +1,28 @@
+
+
+#' Black Illiteracy Rates in 1910 US Census
+#'
+#' This data set contains the proportion of the residents who are black, the
+#' proportion of those who can read, the total population as well as the actual
+#' black literacy rate and white literacy rate for 1040 counties in the US. The
+#' dataset was originally analyzed by Robinson (1950) at the state level. King
+#' (1997) recoded the 1910 census at county level. The data set only includes
+#' those who are older than 10 years of age.
+#'
+#'
+#' @name census
+#' @docType data
+#' @format A data frame containing 5 variables and 1040 observations
+#' \tabular{lll}{ X \tab numeric \tab the proportion of Black residents in each
+#' county\cr Y \tab numeric \tab the overall literacy rates in each county\cr N
+#' \tab numeric \tab the total number of residents in each county \cr W1 \tab
+#' numeric \tab the actual Black literacy rate \cr W2 \tab numeric \tab the
+#' actual White literacy rate }
+#' @references Robinson, W.S. (1950). ``Ecological Correlations and the
+#' Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15,
+#' pp.351-357. \cr \cr King, G. (1997). \dQuote{A Solution to the Ecological
+#' Inference Problem: Reconstructing Individual Behavior from Aggregate Data}.
+#' Princeton University Press, Princeton, NJ.
+#' @keywords datasets
+NULL
+
diff --git a/R/eco.R b/R/eco.R
index 2bd3b32..5a613a7 100644
--- a/R/eco.R
+++ b/R/eco.R
@@ -1,3 +1,166 @@
+#' Fitting the Parametric Bayesian Model of Ecological Inference in 2x2 Tables
+#'
+#' \code{eco} is used to fit the parametric Bayesian model (based on a
+#' Normal/Inverse-Wishart prior) for ecological inference in \eqn{2 \times 2}
+#' tables via Markov chain Monte Carlo. It gives the in-sample predictions as
+#' well as the estimates of the model parameters. The model and algorithm are
+#' described in Imai, Lu and Strauss (2008, 2011).
+#'
+#' An example of \eqn{2 \times 2} ecological table for racial voting is given
+#' below: \tabular{llccc}{ \tab \tab black voters \tab white voters \tab \cr
+#' \tab vote \tab \eqn{W_{1i}} \tab \eqn{W_{2i}} \tab \eqn{Y_i} \cr \tab not
+#' vote \tab \eqn{1-W_{1i}} \tab \eqn{1-W_{2i}} \tab \eqn{1-Y_i} \cr \tab \tab
+#' \eqn{X_i} \tab \eqn{1-X_i} \tab } where \eqn{Y_i} and \eqn{X_i} represent
+#' the observed margins, and \eqn{W_1} and \eqn{W_2} are unknown variables. In
+#' this exmaple, \eqn{Y_i} is the turnout rate in the ith precint, \eqn{X_i} is
+#' the proproption of African American in the ith precinct. The unknowns
+#' \eqn{W_{1i}} an d\eqn{W_{2i}} are the black and white turnout, respectively.
+#' All variables are proportions and hence bounded between 0 and 1. For each
+#' \eqn{i}, the following deterministic relationship holds, \eqn{Y_i=X_i
+#' W_{1i}+(1-X_i)W_{2i}}.
+#'
+#' @param formula A symbolic description of the model to be fit, specifying the
+#' column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X}
+#' specifies \code{Y} as the column margin (e.g., turnout) and \code{X} as the
+#' row margin (e.g., percent African-American). Details and specific examples
+#' are given below.
+#' @param data An optional data frame in which to interpret the variables in
+#' \code{formula}. The default is the environment in which \code{eco} is
+#' called.
+#' @param N An optional variable representing the size of the unit; e.g., the
+#' total number of voters. \code{N} needs to be a vector of same length as
+#' \code{Y} and \code{X} or a scalar.
+#' @param supplement An optional matrix of supplemental data. The matrix has
+#' two columns, which contain additional individual-level data such as survey
+#' data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no
+#' additional individual-level data are included in the model. The default is
+#' \code{NULL}.
+#' @param context Logical. If \code{TRUE}, the contextual effect is also
+#' modeled, that is to assume the row margin \eqn{X} and the unknown \eqn{W_1}
+#' and \eqn{W_2} are correlated. See Imai, Lu and Strauss (2008, 2011) for
+#' details. The default is \code{FALSE}.
+#' @param mu0 A scalar or a numeric vector that specifies the prior mean for
+#' the mean parameter \eqn{\mu} for \eqn{(W_1,W_2)} (or for \eqn{(W_1, W_2, X)}
+#' if \code{context=TRUE}). When the input of \code{mu0} is a scalar, its value
+#' will be repeated to yield a vector of the length of \eqn{\mu}, otherwise, it
+#' needs to be a vector of same length as \eqn{\mu}. When \code{context=TRUE},
+#' the length of \eqn{\mu} is 3, otherwise it is 2. The default is \code{0}.
+#' @param tau0 A positive integer representing the scale parameter of the
+#' Normal-Inverse Wishart prior for the mean and variance parameter \eqn{(\mu,
+#' \Sigma)}. The default is \code{2}.
+#' @param nu0 A positive integer representing the prior degrees of freedom of
+#' the Normal-Inverse Wishart prior for the mean and variance parameter
+#' \eqn{(\mu, \Sigma)}. The default is \code{4}.
+#' @param S0 A positive scalar or a positive definite matrix that specifies the
+#' prior scale matrix of the Normal-Inverse Wishart prior for the mean and
+#' variance parameter \eqn{(\mu, \Sigma)} . If it is a scalar, then the prior
+#' scale matrix will be a diagonal matrix with the same dimensions as
+#' \eqn{\Sigma} and the diagonal elements all take value of \code{S0},
+#' otherwise \code{S0} needs to have same dimensions as \eqn{\Sigma}. When
+#' \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} matrix, otherwise,
+#' it is \eqn{2 \times 2}. The default is \code{10}.
+#' @param mu.start A scalar or a numeric vector that specifies the starting
+#' values of the mean parameter \eqn{\mu}. If it is a scalar, then its value
+#' will be repeated to yield a vector of the length of \eqn{\mu}, otherwise, it
+#' needs to be a vector of same length as \eqn{\mu}. When
+#' \code{context=FALSE}, the length of \eqn{\mu} is 2, otherwise it is 3. The
+#' default is \code{0}.
+#' @param Sigma.start A scalar or a positive definite matrix that specified the
+#' starting value of the variance matrix \eqn{\Sigma}. If it is a scalar, then
+#' the prior scale matrix will be a diagonal matrix with the same dimensions as
+#' \eqn{\Sigma} and the diagonal elements all take value of \code{S0},
+#' otherwise \code{S0} needs to have same dimensions as \eqn{\Sigma}. When
+#' \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} matrix, otherwise,
+#' it is \eqn{2 \times 2}. The default is \code{10}.
+#' @param parameter Logical. If \code{TRUE}, the Gibbs draws of the population
+#' parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to the
+#' in-sample predictions of the missing internal cells, \eqn{W}. The default is
+#' \code{TRUE}.
+#' @param grid Logical. If \code{TRUE}, the grid method is used to sample
+#' \eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis algorithm is
+#' used where candidate draws are sampled from the uniform distribution on the
+#' tomography line for each unit. Note that the grid method is significantly
+#' slower than the Metropolis algorithm. The default is \code{FALSE}.
+#' @param n.draws A positive integer. The number of MCMC draws. The default is
+#' \code{5000}.
+#' @param burnin A positive integer. The burnin interval for the Markov chain;
+#' i.e. the number of initial draws that should not be stored. The default is
+#' \code{0}.
+#' @param thin A positive integer. The thinning interval for the Markov chain;
+#' i.e. the number of Gibbs draws between the recorded values that are skipped.
+#' The default is \code{0}.
+#' @param verbose Logical. If \code{TRUE}, the progress of the Gibbs sampler is
+#' printed to the screen. The default is \code{FALSE}.
+#' @return An object of class \code{eco} containing the following elements:
+#' \item{call}{The matched call.}
+#' \item{X}{The row margin, \eqn{X}.}
+#' \item{Y}{The column margin, \eqn{Y}.}
+#' \item{N}{The size of each table, \eqn{N}.}
+#' \item{burnin}{The number of initial burnin draws.}
+#' \item{thin}{The thinning interval.}
+#' \item{nu0}{The prior degrees of freedom.}
+#' \item{tau0}{The prior scale parameter.}
+#' \item{mu0}{The prior mean.}
+#' \item{S0}{The prior scale matrix.}
+#' \item{W}{A three dimensional array storing the posterior in-sample predictions of \eqn{W}.
+#' The first dimension indexes the Monte Carlo draws, the second dimension indexes the
+#' columns of the table, and the third dimension represents the observations.}
+#' \item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.}
+#' \item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.} The
+#' following additional elements are included in the output when
+#' \code{parameter = TRUE}.
+#' \item{mu}{The posterior draws of the population mean parameter, \eqn{\mu}.}
+#' \item{Sigma}{The posterior draws of the population variance matrix, \eqn{\Sigma}.}
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying
+#' Lu,Center for Promoting Research Involving Innovative Statistical
+#' Methodology (PRIISM), New York University, \email{ying.lu@@nyu.Edu}
+#' @seealso \code{ecoML}, \code{ecoNP}, \code{predict.eco}, \code{summary.eco}
+#' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R
+#' Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+#' Software, Vol. 42, No. 5, pp. 1-23. available at
+#' \url{http://imai.princeton.edu/software/eco.html}
+#'
+#' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
+#' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+#' Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available
+#' at \url{http://imai.princeton.edu/research/eiall.html}
+#' @keywords models
+#' @examples
+#'
+#'
+#' ## load the registration data
+#' \dontrun{data(reg)
+#'
+#' ## NOTE: convergence has not been properly assessed for the following
+#' ## examples. See Imai, Lu and Strauss (2008, 2011) for more
+#' ## complete analyses.
+#'
+#' ## fit the parametric model with the default prior specification
+#' res <- eco(Y ~ X, data = reg, verbose = TRUE)
+#' ## summarize the results
+#' summary(res)
+#'
+#' ## obtain out-of-sample prediction
+#' out <- predict(res, verbose = TRUE)
+#' ## summarize the results
+#' summary(out)
+#'
+#' ## load the Robinson's census data
+#' data(census)
+#'
+#' ## fit the parametric model with contextual effects and N
+#' ## using the default prior specification
+#' res1 <- eco(Y ~ X, N = N, context = TRUE, data = census, verbose = TRUE)
+#' ## summarize the results
+#' summary(res1)
+#'
+#' ## obtain out-of-sample prediction
+#' out1 <- predict(res1, verbose = TRUE)
+#' ## summarize the results
+#' summary(out1)
+#' }
+#'
eco <- function(formula, data = parent.frame(), N = NULL, supplement = NULL,
context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10,
mu.start = 0, Sigma.start = 10, parameter = TRUE,
diff --git a/R/ecoBD.R b/R/ecoBD.R
index ab73696..71389a2 100644
--- a/R/ecoBD.R
+++ b/R/ecoBD.R
@@ -1,3 +1,105 @@
+#' Calculating the Bounds for Ecological Inference in RxC Tables
+#'
+#' \code{ecoBD} is used to calculate the bounds for missing internal cells of
+#' \eqn{R \times C} ecological table. The data can be entered either in the
+#' form of counts or proportions.
+#'
+#' The data may be entered either in the form of counts or proportions. If
+#' proportions are used, \code{formula} may omit the last row and/or column of
+#' tables, which can be calculated from the remaining margins. For example,
+#' \code{Y ~ X} specifies \code{Y} as the first column margin and \code{X} as
+#' the first row margin in \eqn{2 \times 2} tables. If counts are used,
+#' \code{formula} may omit the last row and/or column margin of the table only
+#' if \code{N} is supplied. In this example, the columns will be labeled as
+#' \code{X} and \code{not X}, and the rows will be labeled as \code{Y} and
+#' \code{not Y}.
+#'
+#' For larger tables, one can use \code{cbind()} and \code{+}. For example,
+#' \code{cbind(Y1, Y2, Y3) ~ X1 + X2 + X3 + X4)} specifies \eqn{3 \times 4}
+#' tables.
+#'
+#' An \eqn{R \times C} ecological table in the form of counts: \tabular{lcccc}{
+#' \eqn{n_{i11}} \tab \eqn{n_{i12}} \tab \dots{} \tab \eqn{n_{i1C}} \tab
+#' \eqn{n_{i1.}} \cr \eqn{n_{i21}} \tab \eqn{n_{i22}} \tab \dots{} \tab
+#' \eqn{n_{i2C}} \tab \eqn{n_{i2.}} \cr \dots{} \tab \dots{} \tab \dots{} \tab
+#' \dots{} \tab \dots{}\cr \eqn{n_{iR1}} \tab \eqn{n_{iR2}} \tab \dots{} \tab
+#' \eqn{n_{iRC}} \tab \eqn{n_{iR.}} \cr \eqn{n_{i.1}} \tab \eqn{n_{i.2}} \tab
+#' \dots{} \tab \eqn{n_{i.C}} \tab \eqn{N_i} } where \eqn{n_{nr.}} and
+#' \eqn{n_{i.c}} represent the observed margins, \eqn{N_i} represents the size
+#' of the table, and \eqn{n_{irc}} are unknown variables. Note that for each
+#' \eqn{i}, the following deterministic relationships hold; \eqn{n_{ir.} =
+#' \sum_{c=1}^C n_{irc}} for \eqn{r=1,\dots,R}, and \eqn{n_{i.c}=\sum_{r=1}^R
+#' n_{irc}} for \eqn{c=1,\dots,C}. Then, each of the unknown inner cells can be
+#' bounded in the following manner, \deqn{\max(0, n_{ir.}+n_{i.c}-N_i) \le
+#' n_{irc} \le \min(n_{ir.}, n_{i.c}).} If the size of tables, \code{N}, is
+#' provided,
+#'
+#' An \eqn{R \times C} ecological table in the form of proportions:
+#' \tabular{lcccc}{ \eqn{W_{i11}} \tab \eqn{W_{i12}} \tab \dots{} \tab
+#' \eqn{W_{i1C}} \tab \eqn{Y_{i1}} \cr \eqn{W_{i21}} \tab \eqn{W_{i22}} \tab
+#' \dots{} \tab \eqn{W_{i2C}} \tab \eqn{Y_{i2}} \cr \dots{} \tab \dots{} \tab
+#' \dots{} \tab \dots{} \tab \dots{} \cr \eqn{W_{iR1}} \tab \eqn{W_{iR2}} \tab
+#' \dots{} \tab \eqn{W_{iRC}} \tab \eqn{Y_{iR}} \cr \eqn{X_{i1}} \tab
+#' \eqn{X_{i2}} \tab \dots{} \tab \eqn{X_{iC}} \tab } where \eqn{Y_{ir}} and
+#' \eqn{X_{ic}} represent the observed margins, and \eqn{W_{irc}} are unknown
+#' variables. Note that for each \eqn{i}, the following deterministic
+#' relationships hold; \eqn{Y_{ir} = \sum_{c=1}^C X_{ic} W_{irc}} for
+#' \eqn{r=1,\dots,R}, and \eqn{\sum_{r=1}^R W_{irc}=1} for \eqn{c=1,\dots,C}.
+#' Then, each of the inner cells of the table can be bounded in the following
+#' manner, \deqn{\max(0, (X_{ic} + Y_{ir}-1)/X_{ic}) \le W_{irc} \le \min(1,
+#' Y_{ir}/X_{ir}).}
+#'
+#' @param formula A symbolic description of ecological table to be used,
+#' specifying the column and row margins of \eqn{R \times C} ecological tables.
+#' Details and specific examples are given below.
+#' @param data An optional data frame in which to interpret the variables in
+#' \code{formula}. The default is the environment in which \code{ecoBD} is
+#' called.
+#' @param N An optional variable representing the size of the unit; e.g., the
+#' total number of voters. If \code{formula} is entered as counts and the last
+#' row and/or column is omitted, this input is necessary.
+#' @return An object of class \code{ecoBD} containing the following elements
+#' (When three dimensional arrays are used, the first dimension indexes the
+#' observations, the second dimension indexes the row numbers, and the third
+#' dimension indexes the column numbers):
+#' \item{call}{The matched call.}
+#' \item{X}{A matrix of the observed row margin, \eqn{X}.}
+#' \item{Y}{A matrix of the observed column margin, \eqn{Y}.}
+#' \item{N}{A vector of the size of ecological tables, \eqn{N}.}
+#' \item{aggWmin}{A three dimensional array of
+#' aggregate lower bounds for proportions.}
+#' \item{aggWmax}{A three dimensional array of aggregate upper bounds for proportions.}
+#' \item{Wmin}{A three dimensional array of lower bounds for proportions.}
+#' \item{Wmax}{A three dimensional array of upper bounds for proportions.}
+#' \item{Nmin}{A three dimensional array of lower bounds for counts.}
+#' \item{Nmax}{A three dimensional array of upper bounds for counts.} The object
+#' can be printed through \code{print.ecoBD}.
+#' @author Kosuke Imai, Department of Politics, Princeton University
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu/}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{eco}, \code{ecoNP}
+#' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011) \dQuote{eco: R
+#' Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+#' Software, Vol. 42, No. 5, pp. 1-23. available at
+#' \url{http://imai.princeton.edu/software/eco.html}
+#'
+#' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008) \dQuote{Bayesian and
+#' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+#' Approach} Political Analysis, Vol. 16, No. 1, (Winter), pp. 41-69.
+#' available at \url{http://imai.princeton.edu/research/eiall.html}
+#' @keywords models
+#' @examples
+#'
+#'
+#' ## load the registration data
+#' data(reg)
+#'
+#' ## calculate the bounds
+#' res <- ecoBD(Y ~ X, N = N, data = reg)
+#' ## print the results
+#' print(res)
+#'
ecoBD <- function(formula, data = parent.frame(), N=NULL){
mf <- match.call()
tt <- terms(formula)
diff --git a/R/ecoNP.R b/R/ecoNP.R
index fb32210..030613d 100644
--- a/R/ecoNP.R
+++ b/R/ecoNP.R
@@ -1,3 +1,170 @@
+#' Fitting the Nonparametric Bayesian Models of Ecological Inference in 2x2
+#' Tables
+#'
+#' \code{ecoNP} is used to fit the nonparametric Bayesian model (based on a
+#' Dirichlet process prior) for ecological inference in \eqn{2 \times 2} tables
+#' via Markov chain Monte Carlo. It gives the in-sample predictions as well as
+#' out-of-sample predictions for population inference. The models and
+#' algorithms are described in Imai, Lu and Strauss (2008, 2011).
+#'
+#'
+#' @param formula A symbolic description of the model to be fit, specifying the
+#' column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X}
+#' specifies \code{Y} as the column margin (e.g., turnout) and \code{X} as the
+#' row margin (e.g., percent African-American). Details and specific examples
+#' are given below.
+#' @param data An optional data frame in which to interpret the variables in
+#' \code{formula}. The default is the environment in which \code{ecoNP} is
+#' called.
+#' @param N An optional variable representing the size of the unit; e.g., the
+#' total number of voters. \code{N} needs to be a vector of same length as
+#' \code{Y} and \code{X} or a scalar.
+#' @param supplement An optional matrix of supplemental data. The matrix has
+#' two columns, which contain additional individual-level data such as survey
+#' data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no
+#' additional individual-level data are included in the model. The default is
+#' \code{NULL}.
+#' @param context Logical. If \code{TRUE}, the contextual effect is also
+#' modeled, that is to assume the row margin \eqn{X} and the unknown \eqn{W_1}
+#' and \eqn{W_2} are correlated. See Imai, Lu and Strauss (2008, 2011) for
+#' details. The default is \code{FALSE}.
+#' @param mu0 A scalar or a numeric vector that specifies the prior mean for
+#' the mean parameter \eqn{\mu} of the base prior distribution \eqn{G_0} (see
+#' Imai, Lu and Strauss (2008, 2011) for detailed descriptions of Dirichlete
+#' prior and the normal base prior distribution) . If it is a scalar, then its
+#' value will be repeated to yield a vector of the length of \eqn{\mu},
+#' otherwise, it needs to be a vector of same length as \eqn{\mu}. When
+#' \code{context=TRUE }, the length of \eqn{\mu} is 3, otherwise it is 2. The
+#' default is \code{0}.
+#' @param tau0 A positive integer representing the scale parameter of the
+#' Normal-Inverse Wishart prior for the mean and variance parameter
+#' \eqn{(\mu_i, \Sigma_i)} of each observation. The default is \code{2}.
+#' @param nu0 A positive integer representing the prior degrees of freedom of
+#' the variance matrix \eqn{\Sigma_i}. the default is \code{4}.
+#' @param S0 A positive scalar or a positive definite matrix that specifies the
+#' prior scale matrix for the variance matrix \eqn{\Sigma_i}. If it is a
+#' scalar, then the prior scale matrix will be a diagonal matrix with the same
+#' dimensions as \eqn{\Sigma_i} and the diagonal elements all take value of
+#' \code{S0}, otherwise \code{S0} needs to have same dimensions as
+#' \eqn{\Sigma_i}. When \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3}
+#' matrix, otherwise, it is \eqn{2 \times 2}. The default is \code{10}.
+#' @param alpha A positive scalar representing a user-specified fixed value of
+#' the concentration parameter, \eqn{\alpha}. If \code{NULL}, \eqn{\alpha} will
+#' be updated at each Gibbs draw, and its prior parameters \code{a0} and
+#' \code{b0} need to be specified. The default is \code{NULL}.
+#' @param a0 A positive integer representing the value of shape parameter of
+#' the gamma prior distribution for \eqn{\alpha}. The default is \code{1}.
+#' @param b0 A positive integer representing the value of the scale parameter
+#' of the gamma prior distribution for \eqn{\alpha}. The default is \code{0.1}.
+#' @param parameter Logical. If \code{TRUE}, the Gibbs draws of the population
+#' parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to the
+#' in-sample predictions of the missing internal cells, \eqn{W}. The default is
+#' \code{FALSE}. This needs to be set to \code{TRUE} if one wishes to make
+#' population inferences through \code{predict.eco}. See an example below.
+#' @param grid Logical. If \code{TRUE}, the grid method is used to sample
+#' \eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis algorithm is
+#' used where candidate draws are sampled from the uniform distribution on the
+#' tomography line for each unit. Note that the grid method is significantly
+#' slower than the Metropolis algorithm.
+#' @param n.draws A positive integer. The number of MCMC draws. The default is
+#' \code{5000}.
+#' @param burnin A positive integer. The burnin interval for the Markov chain;
+#' i.e. the number of initial draws that should not be stored. The default is
+#' \code{0}.
+#' @param thin A positive integer. The thinning interval for the Markov chain;
+#' i.e. the number of Gibbs draws between the recorded values that are skipped.
+#' The default is \code{0}.
+#' @param verbose Logical. If \code{TRUE}, the progress of the Gibbs sampler is
+#' printed to the screen. The default is \code{FALSE}.
+#' @return An object of class \code{ecoNP} containing the following elements:
+#' \item{call}{The matched call.}
+#' \item{X}{The row margin, \eqn{X}.}
+#' \item{Y}{The column margin, \eqn{Y}.}
+#' \item{burnin}{The number of initial burnin draws.}
+#' \item{thin}{The thinning interval.}
+#' \item{nu0}{The prior degrees of freedom.}
+#' \item{tau0}{The prior scale parameter.}
+#' \item{mu0}{The prior mean.}
+#' \item{S0}{The prior scale matrix.}
+#' \item{a0}{The prior shape parameter.}
+#' \item{b0}{The prior scale parameter.}
+#' \item{W}{A three dimensional array storing the posterior in-sample predictions
+#' of \eqn{W}. The first dimension indexes the Monte Carlo draws, the second dimension
+#' indexes the columns of the table, and the third dimension represents the observations.}
+#' \item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.}
+#' \item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.}
+#' The following additional elements are included in the output when
+#' \code{parameter = TRUE}.
+#' \item{mu}{A three dimensional array storing the
+#' posterior draws of the population mean parameter, \eqn{\mu}. The first
+#' dimension indexes the Monte Carlo draws, the second dimension indexes the
+#' columns of the table, and the third dimension represents the observations.}
+#' \item{Sigma}{A three dimensional array storing the posterior draws of the
+#' population variance matrix, \eqn{\Sigma}. The first dimension indexes the
+#' Monte Carlo draws, the second dimension indexes the parameters, and the
+#' third dimension represents the observations. }
+#' \item{alpha}{The posterior draws of \eqn{\alpha}.}
+#' \item{nstar}{The number of clusters at each Gibbs draw.}
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{eco}, \code{ecoML}, \code{predict.eco}, \code{summary.ecoNP}
+#' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R
+#' Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+#' Software, Vol. 42, No. 5, pp. 1-23. available at
+#' \url{http://imai.princeton.edu/software/eco.html}
+#'
+#' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
+#' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+#' Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available
+#' at \url{http://imai.princeton.edu/research/eiall.html}
+#' @keywords models
+#' @examples
+#'
+#'
+#' ## load the registration data
+#' data(reg)
+#'
+#' ## NOTE: We set the number of MCMC draws to be a very small number in
+#' ## the following examples; i.e., convergence has not been properly
+#' ## assessed. See Imai, Lu and Strauss (2006) for more complete examples.
+#'
+#' ## fit the nonparametric model to give in-sample predictions
+#' ## store the parameters to make population inference later
+#' \dontrun{res <- ecoNP(Y ~ X, data = reg, n.draws = 50, param = TRUE, verbose = TRUE)
+#'
+#' ##summarize the results
+#' summary(res)
+#'
+#' ## obtain out-of-sample prediction
+#' out <- predict(res, verbose = TRUE)
+#'
+#' ## summarize the results
+#' summary(out)
+#'
+#' ## density plots of the out-of-sample predictions
+#' par(mfrow=c(2,1))
+#' plot(density(out[,1]), main = "W1")
+#' plot(density(out[,2]), main = "W2")
+#'
+#'
+#' ## load the Robinson's census data
+#' data(census)
+#'
+#' ## fit the parametric model with contextual effects and N
+#' ## using the default prior specification
+#'
+#' res1 <- ecoNP(Y ~ X, N = N, context = TRUE, param = TRUE, data = census,
+#' n.draws = 25, verbose = TRUE)
+#'
+#' ## summarize the results
+#' summary(res1)
+#'
+#' ## out-of sample prediction
+#' pres1 <- predict(res1)
+#' summary(pres1)}
+#'
ecoNP <- function(formula, data = parent.frame(), N = NULL, supplement = NULL,
context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10,
alpha = NULL, a0 = 1, b0 = 0.1, parameter = FALSE,
diff --git a/R/emeco.R b/R/emeco.R
index 0fc4720..3bc202e 100644
--- a/R/emeco.R
+++ b/R/emeco.R
@@ -1,7 +1,214 @@
-
###
### main function
###
+
+
+#' Fitting Parametric Models and Quantifying Missing Information for Ecological
+#' Inference in 2x2 Tables
+#'
+#' \code{ecoML} is used to fit parametric models for ecological inference in
+#' \eqn{2 \times 2} tables via Expectation Maximization (EM) algorithms. The
+#' data is specified in proportions. At it's most basic setting, the algorithm
+#' assumes that the individual-level proportions (i.e., \eqn{W_1} and
+#' \eqn{W_2}) and distributed bivariate normally (after logit transformations).
+#' The function calculates point estimates of the parameters for models based
+#' on different assumptions. The standard errors of the point estimates are
+#' also computed via Supplemented EM algorithms. Moreover, \code{ecoML}
+#' quantifies the amount of missing information associated with each parameter
+#' and allows researcher to examine the impact of missing information on
+#' parameter estimation in ecological inference. The models and algorithms are
+#' described in Imai, Lu and Strauss (2008, 2011).
+#'
+#' When \code{SEM} is \code{TRUE}, \code{ecoML} computes the observed-data
+#' information matrix for the parameters of interest based on Supplemented-EM
+#' algorithm. The inverse of the observed-data information matrix can be used
+#' to estimate the variance-covariance matrix for the parameters estimated from
+#' EM algorithms. In addition, it also computes the expected complete-data
+#' information matrix. Based on these two measures, one can further calculate
+#' the fraction of missing information associated with each parameter. See
+#' Imai, Lu and Strauss (2006) for more details about fraction of missing
+#' information.
+#'
+#' Moreover, when \code{hytest=TRUE}, \code{ecoML} allows to estimate the
+#' parametric model under the null hypothesis that \code{mu_1=mu_2}. One can
+#' then construct the likelihood ratio test to assess the hypothesis of equal
+#' means. The associated fraction of missing information for the test statistic
+#' can be also calculated. For details, see Imai, Lu and Strauss (2006) for
+#' details.
+#'
+#' @param formula A symbolic description of the model to be fit, specifying the
+#' column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X}
+#' specifies \code{Y} as the column margin (e.g., turnout) and \code{X} (e.g.,
+#' percent African-American) as the row margin. Details and specific examples
+#' are given below.
+#' @param data An optional data frame in which to interpret the variables in
+#' \code{formula}. The default is the environment in which \code{ecoML} is
+#' called.
+#' @param N An optional variable representing the size of the unit; e.g., the
+#' total number of voters. \code{N} needs to be a vector of same length as
+#' \code{Y} and \code{X} or a scalar.
+#' @param supplement An optional matrix of supplemental data. The matrix has
+#' two columns, which contain additional individual-level data such as survey
+#' data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no
+#' additional individual-level data are included in the model. The default is
+#' \code{NULL}.
+#' @param fix.rho Logical. If \code{TRUE}, the correlation (when
+#' \code{context=TRUE}) or the partial correlation (when \code{context=FALSE})
+#' between \eqn{W_1} and \eqn{W_2} is fixed through the estimation. For
+#' details, see Imai, Lu and Strauss(2006). The default is \code{FALSE}.
+#' @param context Logical. If \code{TRUE}, the contextual effect is also
+#' modeled. In this case, the row margin (i.e., X) and the individual-level
+#' rates (i.e., \eqn{W_1} and \eqn{W_2}) are assumed to be distributed
+#' tri-variate normally (after logit transformations). See Imai, Lu and Strauss
+#' (2006) for details. The default is \code{FALSE}.
+#' @param sem Logical. If \code{TRUE}, the standard errors of parameter
+#' estimates are estimated via SEM algorithm, as well as the fraction of
+#' missing data. The default is \code{TRUE}.
+#' @param theta.start A numeric vector that specifies the starting values for
+#' the mean, variance, and covariance. When \code{context = FALSE}, the
+#' elements of \code{theta.start} correspond to (\eqn{E(W_1)}, \eqn{E(W_2)},
+#' \eqn{var(W_1)}, \eqn{var(W_2)}, \eqn{cor(W_1,W_2)}). When \code{context =
+#' TRUE}, the elements of \code{theta.start} correspond to (\eqn{E(W_1)},
+#' \eqn{E(W_2)}, \eqn{var(W_1)}, \eqn{var(W_2)}, \eqn{corr(W_1, X)},
+#' \eqn{corr(W_2, X)}, \eqn{corr(W_1,W_2)}). Moreover, when
+#' \code{fix.rho=TRUE}, \eqn{corr(W_1,W_2)} is set to be the correlation
+#' between \eqn{W_1} and \eqn{W_2} when \code{context = FALSE}, and the partial
+#' correlation between \eqn{W_1} and \eqn{W_2} given \eqn{X} when \code{context
+#' = FALSE}. The default is \code{c(0,0,1,1,0)}.
+#' @param epsilon A positive number that specifies the convergence criterion
+#' for EM algorithm. The square root of \code{epsilon} is the convergence
+#' criterion for SEM algorithm. The default is \code{10^(-6)}.
+#' @param maxit A positive integer specifies the maximum number of iterations
+#' before the convergence criterion is met. The default is \code{1000}.
+#' @param loglik Logical. If \code{TRUE}, the value of the log-likelihood
+#' function at each iteration of EM is saved. The default is \code{TRUE}.
+#' @param hyptest Logical. If \code{TRUE}, model is estimated under the null
+#' hypothesis that means of \eqn{W1} and \eqn{W2} are the same. The default is
+#' \code{FALSE}.
+#' @param verbose Logical. If \code{TRUE}, the progress of the EM and SEM
+#' algorithms is printed to the screen. The default is \code{FALSE}.
+#' @return An object of class \code{ecoML} containing the following elements:
+#' \item{call}{The matched call.}
+#' \item{X}{The row margin, \eqn{X}.}
+#' \item{Y}{The column margin, \eqn{Y}.}
+#' \item{N}{The size of each table, \eqn{N}.}
+#' \item{context}{The assumption under which model is estimated. If
+#' \code{context = FALSE}, CAR assumption is adopted and no contextual effect
+#' is modeled. If \code{context = TRUE}, NCAR assumption is adopted, and
+#' contextual effect is modeled.} \item{sem}{Whether SEM algorithm is used to
+#' estimate the standard errors and observed information matrix for the
+#' parameter estimates.}
+#' \item{fix.rho}{Whether the correlation or the partial
+#' correlation between \eqn{W_1} an \eqn{W_2} is fixed in the estimation.}
+#' \item{r12}{If \code{fix.rho = TRUE}, the value that \eqn{corr(W_1, W_2)} is
+#' fixed to.}
+#' \item{epsilon}{The precision criterion for EM convergence.
+#' \eqn{\sqrt{\epsilon}} is the precision criterion for SEM convergence.}
+#' \item{theta.sem}{The ML estimates of \eqn{E(W_1)},\eqn{E(W_2)},
+#' \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. If \code{context =
+#' TRUE}, \eqn{E(X)},\eqn{cov(W_1,X)}, \eqn{cov(W_2,X)} are also reported.}
+#' \item{W}{In-sample estimation of \eqn{W_1} and \eqn{W_2}.}
+#' \item{suff.stat}{The sufficient statistics for \code{theta.em}.}
+#' \item{iters.em}{Number of EM iterations before convergence is achieved.}
+#' \item{iters.sem}{Number of SEM iterations before convergence is achieved.}
+#' \item{loglik}{The log-likelihood of the model when convergence is achieved.}
+#' \item{loglik.log.em}{A vector saving the value of the log-likelihood
+#' function at each iteration of the EM algorithm.}
+#' \item{mu.log.em}{A matrix saving the unweighted mean estimation of the
+#' logit-transformed individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2})
+#' at each iteration of the EM process.} \item{Sigma.log.em}{A matrix saving the
+#' log of the variance estimation of the logit-transformed individual-level
+#' proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of EM process.
+#' Note, non-transformed variances are displayed on the screen (when
+#' \code{verbose = TRUE}).}
+#' \item{rho.fisher.em}{A matrix saving the fisher
+#' transformation of the estimation of the correlations between the
+#' logit-transformed individual-level proportions (i.e., \eqn{W_1} and
+#' \eqn{W_2}) at each iteration of EM process. Note, non-transformed
+#' correlations are displayed on the screen (when \code{verbose = TRUE}).}
+#' Moreover, when \code{sem=TRUE}, \code{ecoML} also output the following
+#' values:
+#' \item{DM}{The matrix characterizing the rates of convergence of the
+#' EM algorithms. Such information is also used to calculate the observed-data
+#' information matrix}
+#' \item{Icom}{The (expected) complete data information
+#' matrix estimated via SEM algorithm. When \code{context=FALSE, fix.rho=TRUE},
+#' \code{Icom} is 4 by 4. When \code{context=FALSE, fix.rho=FALSE}, \code{Icom}
+#' is 5 by 5. When \code{context=TRUE}, \code{Icom} is 9 by 9.}
+#' \item{Iobs}{The observed information matrix. The dimension of \code{Iobs}
+#' is same as \code{Icom}.}
+#' \item{Imiss}{The difference between \code{Icom} and \code{Iobs}.
+#' The dimension of \code{Imiss} is same as \code{miss}.}
+#' \item{Vobs}{The (symmetrized) variance-covariance matrix of the ML parameter
+#' estimates. The dimension of \code{Vobs} is same as \code{Icom}.}
+#' \item{Iobs}{The (expected) complete-data variance-covariance matrix. The
+#' dimension of \code{Iobs} is same as \code{Icom}.}
+#' \item{Vobs.original}{The estimated variance-covariance matrix of the ML parameter
+#' estimates. The dimension of \code{Vobs} is same as \code{Icom}.}
+#' \item{Fmis}{The fraction of missing information associated with each parameter estimation. }
+#' \item{VFmis}{The proportion of increased variance associated with each
+#' parameter estimation due to observed data. }
+#' \item{Ieigen}{The largest eigen value of \code{Imiss}.}
+#' \item{Icom.trans}{The complete data information
+#' matrix for the fisher transformed parameters.}
+#' \item{Iobs.trans}{The observed data information matrix for the fisher transformed parameters.}
+#' \item{Fmis.trans}{The fractions of missing information associated with the
+#' fisher transformed parameters.}
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University, \email{ying.lu@@nyu.Edu}; Aaron Strauss,
+#' Department of Politics, Princeton University,
+#' \email{abstraus@@Princeton.Edu}.
+#' @seealso \code{eco}, \code{ecoNP}, \code{summary.ecoML}
+#' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R
+#' Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+#' Software, Vol. 42, No. 5, pp. 1-23. available at
+#' \url{http://imai.princeton.edu/software/eco.html}
+#'
+#' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
+#' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+#' Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available
+#' at \url{http://imai.princeton.edu/research/eiall.html}
+#' @keywords models
+#' @examples
+#'
+#'
+#' ## load the census data
+#' data(census)
+#'
+#' ## NOTE: convergence has not been properly assessed for the following
+#' ## examples. See Imai, Lu and Strauss (2006) for more complete analyses.
+#' ## In the first example below, in the interest of time, only part of the
+#' ## data set is analyzed and the convergence requirement is less stringent
+#' ## than the default setting.
+#'
+#' ## In the second example, the program is arbitrarily halted 100 iterations
+#' ## into the simulation, before convergence.
+#'
+#' ## load the Robinson's census data
+#' data(census)
+#'
+#' ## fit the parametric model with the default model specifications
+#' \dontrun{res <- ecoML(Y ~ X, data = census[1:100,], N=census[1:100,3],
+#' epsilon=10^(-6), verbose = TRUE)}
+#' ## summarize the results
+#' \dontrun{summary(res)}
+#'
+#' ## obtain out-of-sample prediction
+#' \dontrun{out <- predict(res, verbose = TRUE)}
+#' ## summarize the results
+#' \dontrun{summary(out)}
+#'
+#' ## fit the parametric model with some individual
+#' ## level data using the default prior specification
+#' surv <- 1:600
+#' \dontrun{res1 <- ecoML(Y ~ X, context = TRUE, data = census[-surv,],
+#' supplement = census[surv,c(4:5,1)], maxit=100, verbose = TRUE)}
+#' ## summarize the results
+#' \dontrun{summary(res1)}
+#'
+#'
ecoML <- function(formula, data = parent.frame(), N=NULL, supplement = NULL,
theta.start = c(0,0,1,1,0), fix.rho = FALSE,
context = FALSE, sem = TRUE, epsilon=10^(-6),
diff --git a/R/forgnlit30.R b/R/forgnlit30.R
new file mode 100644
index 0000000..a7e2ad8
--- /dev/null
+++ b/R/forgnlit30.R
@@ -0,0 +1,26 @@
+
+
+#' Foreign-born literacy in 1930
+#'
+#' This data set contains, on a state level, the proportion of white residents
+#' ten years and older who are foreign born, and the proportion of those
+#' residents who are literate. Data come from the 1930 census and were first
+#' analyzed by Robinson (1950).
+#'
+#'
+#' @name forgnlit30
+#' @docType data
+#' @format A data frame containing 5 variables and 48 observations
+#' \tabular{lll}{ X \tab numeric \tab proportion of the white population at
+#' least 10 years of age that is foreign born \cr Y \tab numeric \tab
+#' proportion of the white population at least 10 years of age that is
+#' illiterate \cr W1 \tab numeric \tab proportion of the foreign-born white
+#' population at least 10 years of age that is illiterate \cr W2 \tab numeric
+#' \tab proportion of the native-born white population at least 10 years of age
+#' that is illiterate \cr ICPSR \tab numeric \tab the ICPSR state code }
+#' @references Robinson, W.S. (1950). ``Ecological Correlations and the
+#' Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15,
+#' pp.351-357.
+#' @keywords datasets
+NULL
+
diff --git a/R/forgnlit30c.R b/R/forgnlit30c.R
new file mode 100644
index 0000000..9bea139
--- /dev/null
+++ b/R/forgnlit30c.R
@@ -0,0 +1,31 @@
+
+
+
+#' Foreign-born literacy in 1930, County Level
+#'
+#' This data set contains, on a county level, the proportion of white residents
+#' ten years and older who are foreign born, and the proportion of those
+#' residents who are literate. Data come from the 1930 census and were first
+#' analyzed by Robinson (1950). Counties with fewer than 100 foreign born
+#' residents are dropped.
+#'
+#'
+#' @name forgnlit30c
+#' @docType data
+#' @format A data frame containing 6 variables and 1976 observations
+#' \tabular{lll}{ X \tab numeric \tab proportion of the white population at
+#' least 10 years of age that is foreign born \cr Y \tab numeric \tab
+#' proportion of the white population at least 10 years of age that is
+#' illiterate \cr W1 \tab numeric \tab proportion of the foreign-born white
+#' population at least 10 years of age that is illiterate \cr W2 \tab numeric
+#' \tab proportion of the native-born white population at least 10 years of age
+#' that is illiterate \cr state \tab numeric \tab the ICPSR state code \cr
+#' county \tab numeric \tab the ICPSR (within state) county code }
+#' @references Robinson, W.S. (1950). ``Ecological Correlations and the
+#' Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15,
+#' pp.351-357.
+#' @keywords datasets
+NULL
+
+
+
diff --git a/R/housep88.R b/R/housep88.R
new file mode 100644
index 0000000..b5b0753
--- /dev/null
+++ b/R/housep88.R
@@ -0,0 +1,32 @@
+
+
+#' Electoral Results for the House and Presidential Races in 1988
+#'
+#' This data set contains, on a House district level, the percentage of the
+#' vote for the Democratic House candidate, the percentage of the vote for the
+#' Democratic presidential candidate (Dukakis), the number of voters who voted
+#' for a major party candidate in the presidential race, and the ratio of
+#' voters in the House race versus the number who cast a ballot for President.
+#' Eleven (11) uncontested races are not included. Dataset compiled and
+#' analyzed by Burden and Kimball (1988). Complete dataset and documentation
+#' available at ICSPR study number 1140.
+#'
+#'
+#' @name housep88
+#' @docType data
+#' @format A data frame containing 5 variables and 424 observations
+#' \tabular{lll}{ X \tab numeric \tab proportion voting for the Democrat in the
+#' presidential race \cr Y \tab numeric \tab proportion voting for the Democrat
+#' in the House race \cr N \tab numeric \tab number of major party voters in
+#' the presidential contest \cr HPCT \tab numeric \tab House election turnout
+#' divided by presidential election turnout (set to 1 if House turnout exceeds
+#' presidential turnout) \cr DIST \tab numeric \tab 4-digit ICPSR state and
+#' district code: first 2 digits for the state code, last two digits for the
+#' district number (e.g., 2106=IL 6th) }
+#' @references Burden, Barry C. and David C. Kimball (1988). ``A New Approach
+#' To Ticket- Splitting.'' The American Political Science Review. vol 92., no.
+#' 3, pp. 553-544.
+#' @keywords datasets
+NULL
+
+
diff --git a/R/predict.eco.R b/R/predict.eco.R
index f0f9819..3b3acbc 100644
--- a/R/predict.eco.R
+++ b/R/predict.eco.R
@@ -1,3 +1,47 @@
+#' Out-of-Sample Posterior Prediction under the Parametric Bayesian Model for
+#' Ecological Inference in 2x2 Tables
+#'
+#' Obtains out-of-sample posterior predictions under the fitted parametric
+#' Bayesian model for ecological inference. \code{predict} method for class
+#' \code{eco} and \code{ecoX}.
+#'
+#' The posterior predictive values are computed using the Monte Carlo sample
+#' stored in the \code{eco} output (or other sample if \code{newdraw} is
+#' specified). Given each Monte Carlo sample of the parameters, we sample the
+#' vector-valued latent variable from the appropriate multivariate Normal
+#' distribution. Then, we apply the inverse logit transformation to obtain the
+#' predictive values of proportions, \eqn{W}. The computation may be slow
+#' (especially for the nonparametric model) if a large Monte Carlo sample of
+#' the model parameters is used. In either case, setting \code{verbose = TRUE}
+#' may be helpful in monitoring the progress of the code.
+#'
+#' @aliases predict.eco
+#' @param object An output object from \code{eco} or \code{ecoNP}.
+#' @param newdraw An optional list containing two matrices (or three
+#' dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu}
+#' and \eqn{\Sigma}. Those elements should be named as \code{mu} and
+#' \code{Sigma}, respectively. The default is the original MCMC draws stored in
+#' \code{object}.
+#' @param subset A scalar or numerical vector specifying the row number(s) of
+#' \code{mu} and \code{Sigma} in the output object from \code{eco}. If
+#' specified, the posterior draws of parameters for those rows are used for
+#' posterior prediction. The default is \code{NULL} where all the posterior
+#' draws are used.
+#' @param verbose logical. If \code{TRUE}, helpful messages along with a
+#' progress report on the Monte Carlo sampling from the posterior predictive
+#' distributions are printed on the screen. The default is \code{FALSE}.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{predict.eco} yields a matrix of class \code{predict.eco}
+#' containing the Monte Carlo sample from the posterior predictive distribution
+#' of inner cells of ecological tables. \code{summary.predict.eco} will
+#' summarize the output, and \code{print.summary.predict.eco} will print the
+#' summary.
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{eco}, \code{predict.ecoNP}
+#' @keywords methods
predict.eco <- function(object, newdraw = NULL, subset = NULL,
verbose = FALSE, ...){
diff --git a/R/predict.ecoNP.R b/R/predict.ecoNP.R
index 32a77b6..e8edad6 100644
--- a/R/predict.ecoNP.R
+++ b/R/predict.ecoNP.R
@@ -1,3 +1,50 @@
+#' Out-of-Sample Posterior Prediction under the Nonparametric Bayesian Model
+#' for Ecological Inference in 2x2 Tables
+#'
+#' Obtains out-of-sample posterior predictions under the fitted nonparametric
+#' Bayesian model for ecological inference. \code{predict} method for class
+#' \code{ecoNP} and \code{ecoNPX}.
+#'
+#' The posterior predictive values are computed using the Monte Carlo sample
+#' stored in the \code{eco} or \code{ecoNP} output (or other sample if
+#' \code{newdraw} is specified). Given each Monte Carlo sample of the
+#' parameters, we sample the vector-valued latent variable from the appropriate
+#' multivariate Normal distribution. Then, we apply the inverse logit
+#' transformation to obtain the predictive values of proportions, \eqn{W}. The
+#' computation may be slow (especially for the nonparametric model) if a large
+#' Monte Carlo sample of the model parameters is used. In either case, setting
+#' \code{verbose = TRUE} may be helpful in monitoring the progress of the code.
+#'
+#' @aliases predict.ecoNP
+#' @param object An output object from \code{ecoNP}.
+#' @param newdraw An optional list containing two matrices (or three
+#' dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu}
+#' and \eqn{\Sigma}. Those elements should be named as \code{mu} and
+#' \code{Sigma}, respectively. The default is the original MCMC draws stored in
+#' \code{object}.
+#' @param subset A scalar or numerical vector specifying the row number(s) of
+#' \code{mu} and \code{Sigma} in the output object from \code{eco}. If
+#' specified, the posterior draws of parameters for those rows are used for
+#' posterior prediction. The default is \code{NULL} where all the posterior
+#' draws are used.
+#' @param obs An integer or vector of integers specifying the observation
+#' number(s) whose posterior draws will be used for predictions. The default is
+#' \code{NULL} where all the observations in the data set are selected.
+#' @param verbose logical. If \code{TRUE}, helpful messages along with a
+#' progress report on the Monte Carlo sampling from the posterior predictive
+#' distributions are printed on the screen. The default is \code{FALSE}.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{predict.eco} yields a matrix of class \code{predict.eco}
+#' containing the Monte Carlo sample from the posterior predictive distribution
+#' of inner cells of ecological tables. \code{summary.predict.eco} will
+#' summarize the output, and \code{print.summary.predict.eco} will print the
+#' summary.
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{eco}, \code{ecoNP}, \code{summary.eco}, \code{summary.ecoNP}
+#' @keywords methods
predict.ecoNP <- function(object, newdraw = NULL, subset = NULL,
obs = NULL, verbose = FALSE, ...){
diff --git a/R/predict.ecoNPX.R b/R/predict.ecoNPX.R
index adf64ad..e0b594b 100644
--- a/R/predict.ecoNPX.R
+++ b/R/predict.ecoNPX.R
@@ -1,3 +1,53 @@
+#' Out-of-Sample Posterior Prediction under the Nonparametric Bayesian Model
+#' for Ecological Inference in 2x2 Tables
+#'
+#' Obtains out-of-sample posterior predictions under the fitted nonparametric
+#' Bayesian model for ecological inference. \code{predict} method for class
+#' \code{ecoNP} and \code{ecoNPX}.
+#'
+#' The posterior predictive values are computed using the Monte Carlo sample
+#' stored in the \code{eco} or \code{ecoNP} output (or other sample if
+#' \code{newdraw} is specified). Given each Monte Carlo sample of the
+#' parameters, we sample the vector-valued latent variable from the appropriate
+#' multivariate Normal distribution. Then, we apply the inverse logit
+#' transformation to obtain the predictive values of proportions, \eqn{W}. The
+#' computation may be slow (especially for the nonparametric model) if a large
+#' Monte Carlo sample of the model parameters is used. In either case, setting
+#' \code{verbose = TRUE} may be helpful in monitoring the progress of the code.
+#'
+#' @aliases predict.ecoNPX
+#' @param object An output object from \code{ecoNP}.
+#' @param newdraw An optional list containing two matrices (or three
+#' dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu}
+#' and \eqn{\Sigma}. Those elements should be named as \code{mu} and
+#' \code{Sigma}, respectively. The default is the original MCMC draws stored in
+#' \code{object}.
+#' @param subset A scalar or numerical vector specifying the row number(s) of
+#' \code{mu} and \code{Sigma} in the output object from \code{eco}. If
+#' specified, the posterior draws of parameters for those rows are used for
+#' posterior prediction. The default is \code{NULL} where all the posterior
+#' draws are used.
+#' @param obs An integer or vector of integers specifying the observation
+#' number(s) whose posterior draws will be used for predictions. The default is
+#' \code{NULL} where all the observations in the data set are selected.
+#' @param cond logical. If \code{TRUE}, then the conditional prediction will
+#' made for the parametric model with contextual effects. The default is
+#' \code{FALSE}.
+#' @param verbose logical. If \code{TRUE}, helpful messages along with a
+#' progress report on the Monte Carlo sampling from the posterior predictive
+#' distributions are printed on the screen. The default is \code{FALSE}.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{predict.eco} yields a matrix of class \code{predict.eco}
+#' containing the Monte Carlo sample from the posterior predictive distribution
+#' of inner cells of ecological tables. \code{summary.predict.eco} will
+#' summarize the output, and \code{print.summary.predict.eco} will print the
+#' summary.
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{eco}, \code{ecoNP}, \code{summary.eco}, \code{summary.ecoNP}
+#' @keywords methods
predict.ecoNPX <- function(object, newdraw = NULL, subset = NULL,
obs = NULL, cond = FALSE, verbose = FALSE, ...){
diff --git a/R/predict.ecoX.R b/R/predict.ecoX.R
index fbe2292..da4cd38 100644
--- a/R/predict.ecoX.R
+++ b/R/predict.ecoX.R
@@ -1,3 +1,53 @@
+#' Out-of-Sample Posterior Prediction under the Parametric Bayesian Model for
+#' Ecological Inference in 2x2 Tables
+#'
+#' Obtains out-of-sample posterior predictions under the fitted parametric
+#' Bayesian model for ecological inference. \code{predict} method for class
+#' \code{eco} and \code{ecoX}.
+#'
+#' The posterior predictive values are computed using the Monte Carlo sample
+#' stored in the \code{eco} output (or other sample if \code{newdraw} is
+#' specified). Given each Monte Carlo sample of the parameters, we sample the
+#' vector-valued latent variable from the appropriate multivariate Normal
+#' distribution. Then, we apply the inverse logit transformation to obtain the
+#' predictive values of proportions, \eqn{W}. The computation may be slow
+#' (especially for the nonparametric model) if a large Monte Carlo sample of
+#' the model parameters is used. In either case, setting \code{verbose = TRUE}
+#' may be helpful in monitoring the progress of the code.
+#'
+#' @aliases predict.ecoX
+#' @param object An output object from \code{eco} or \code{ecoNP}.
+#' @param newdraw An optional list containing two matrices (or three
+#' dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu}
+#' and \eqn{\Sigma}. Those elements should be named as \code{mu} and
+#' \code{Sigma}, respectively. The default is the original MCMC draws stored in
+#' \code{object}.
+#' @param newdata An optional data frame containing a new data set for which
+#' posterior predictions will be made. The new data set must have the same
+#' variable names as those in the original data.
+#' @param subset A scalar or numerical vector specifying the row number(s) of
+#' \code{mu} and \code{Sigma} in the output object from \code{eco}. If
+#' specified, the posterior draws of parameters for those rows are used for
+#' posterior prediction. The default is \code{NULL} where all the posterior
+#' draws are used.
+#' @param cond logical. If \code{TRUE}, then the conditional prediction will
+#' made for the parametric model with contextual effects. The default is
+#' \code{FALSE}.
+#' @param verbose logical. If \code{TRUE}, helpful messages along with a
+#' progress report on the Monte Carlo sampling from the posterior predictive
+#' distributions are printed on the screen. The default is \code{FALSE}.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{predict.eco} yields a matrix of class \code{predict.eco}
+#' containing the Monte Carlo sample from the posterior predictive distribution
+#' of inner cells of ecological tables. \code{summary.predict.eco} will
+#' summarize the output, and \code{print.summary.predict.eco} will print the
+#' summary.
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{eco}, \code{predict.ecoNP}
+#' @keywords methods
predict.ecoX <- function(object, newdraw = NULL, subset = NULL,
newdata = NULL, cond = FALSE, verbose = FALSE, ...){
diff --git a/R/print.summary.eco.R b/R/print.summary.eco.R
index ceddccc..ad0fb65 100644
--- a/R/print.summary.eco.R
+++ b/R/print.summary.eco.R
@@ -1,3 +1,34 @@
+#' Print the Summary of the Results for the Bayesian Parametric Model for Ecological
+#' Inference in 2x2 Tables
+#'
+#' \code{summary} method for class \code{eco}.
+#'
+#'
+#' @aliases print.summary.eco
+#' @param x An object of class \code{summary.eco}.
+#' @param digits the number of significant digits to use when printing.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{summary.eco} yields an object of class \code{summary.eco}
+#' containing the following elements:
+#' \item{call}{The call from \code{eco}.}
+#' \item{n.obs}{The number of units.}
+#' \item{n.draws}{The number of Monte Carlo samples.}
+#' \item{agg.table}{Aggregate posterior estimates of the marginal
+#' means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If
+#' \code{param = TRUE}, the following elements are also included:
+#' \item{param.table}{Posterior estimates of model parameters: population mean
+#' estimates of \eqn{W_1} and \eqn{W_2} and their logit transformations.} If
+#' \code{units = TRUE}, the following elements are also included:
+#' \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
+#' \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
+#'
+#' This object can be printed by \code{print.summary.eco}
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{eco}, \code{predict.eco}
+#' @keywords methods
print.summary.eco <- function(x, digits=max(3, getOption("digits")-3), ...) {
cat("\nCall: ")
cat(paste(deparse(x$call), sep="\n", collapse="\n"))
diff --git a/R/print.summary.ecoML.R b/R/print.summary.ecoML.R
index c1ba4ae..b7890a8 100644
--- a/R/print.summary.ecoML.R
+++ b/R/print.summary.ecoML.R
@@ -1,3 +1,52 @@
+## for simlicity, this summary function only reports parameters related to W_1 and W_2
+
+#' Print the Summary of the Results for the Maximum Likelihood Parametric Model for
+#' Ecological Inference in 2x2 Tables
+#'
+#' \code{summary} method for class \code{eco}.
+#'
+#'
+#' @aliases print.summary.ecoML
+#' @param x An object of class \code{summary.ecoML}.
+#' @param digits the number of significant digits to use when printing.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{summary.eco} yields an object of class \code{summary.eco}
+#' containing the following elements:
+#' \item{call}{The call from \code{eco}.}
+#' \item{sem}{Whether the SEM algorithm was executed, as specified by the user
+#' upon calling \code{ecoML}.}
+#' \item{fix.rho}{Whether the correlation parameter was fixed or allowed to vary,
+#' as specified by the user upon calling \code{ecoML}.}
+#' \item{epsilon}{The convergence threshold specified by the
+#' user upon calling \code{ecoML}.}
+#' \item{n.obs}{The number of units.}
+#' \item{iters.em}{The number iterations the EM algorithm cycled through before
+#' convergence or reaching the maximum number of iterations allowed.}
+#' \item{iters.sem}{The number iterations the SEM algorithm cycled through
+#' before convergence or reaching the maximum number of iterations allowed.}
+#' \item{loglik}{The final observed log-likelihood.}
+#' \item{rho}{A matrix of \code{iters.em} rows specifying the correlation parameters
+#' at each iteration of the EM algorithm. The number of columns depends on how many
+#' correlation parameters exist in the model. Column order is the same as the order of the
+#' parameters in \code{param.table}.}
+#' \item{param.table}{Final estimates of the parameter values for the model.
+#' Excludes parameters fixed by the user upon calling \code{ecoML}.
+#' See \code{ecoML} documentation for order of parameters.}
+#' \item{agg.table}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2}}
+#' \item{agg.wtable}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2}
+#' using \eqn{X} and \eqn{N} as weights.} If \code{units = TRUE}, the following elements
+#' are also included:
+#' \item{W.table}{Unit-level estimates for \eqn{W_1} and \eqn{W_2}.}
+#'
+#' This object can be printed by \code{print.summary.eco}
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}; Aaron Strauss,
+#' Department of Politics, Princeton University,
+#' \email{abstraus@@Princeton.Edu}
+#' @seealso \code{ecoML}
+#' @keywords methods
print.summary.ecoML <- function(x, digits=max(3,
getOption("digits")-3), ...) {
diff --git a/R/print.summary.ecoNP.R b/R/print.summary.ecoNP.R
index 4adfeea..607a30a 100644
--- a/R/print.summary.ecoNP.R
+++ b/R/print.summary.ecoNP.R
@@ -1,3 +1,35 @@
+#' Print the Summary of the Results for the Bayesian Nonparametric Model for Ecological
+#' Inference in 2x2 Tables
+#'
+#' \code{summary} method for class \code{ecoNP}.
+#'
+#'
+#' @aliases print.summary.ecoNP
+#' @param x An object of class \code{summary.ecoNP}.
+#' @param digits the number of significant digits to use when printing.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{summary.ecoNP} yields an object of class \code{summary.ecoNP}
+#' containing the following elements:
+#' \item{call}{The call from \code{ecoNP}.}
+#' \item{n.obs}{The number of units.}
+#' \item{n.draws}{The number of Monte Carlo samples.}
+#' \item{agg.table}{Aggregate posterior estimates of the marginal
+#' means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If
+#' \code{param = TRUE}, the following elements are also included:
+#' \item{param.table}{Posterior estimates of model parameters: population mean
+#' estimates of \eqn{W_1} and \eqn{W_2}. If \code{subset} is specified, only a
+#' subset of the population parameters are included.} If \code{unit = TRUE},
+#' the following elements are also included:
+#' \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
+#' \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
+#'
+#' This object can be printed by \code{print.summary.ecoNP}
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{ecoNP}, \code{predict.eco}
+#' @keywords methods
print.summary.ecoNP <- function(x, digits=max(3, getOption("digits")-3), ...)
{
cat("\nCall: ")
diff --git a/R/reg.R b/R/reg.R
new file mode 100644
index 0000000..ffa2a50
--- /dev/null
+++ b/R/reg.R
@@ -0,0 +1,25 @@
+
+
+#' Voter Registration in US Southern States
+#'
+#' This data set contains the racial composition, the registration rate, the
+#' number of eligible voters as well as the actual observed racial registration
+#' rates for every county in four US southern states: Florida, Louisiana, North
+#' Carolina, and South Carolina.
+#'
+#'
+#' @name reg
+#' @docType data
+#' @format A data frame containing 5 variables and 275 observations
+#' \tabular{lll}{ X \tab numeric \tab the fraction of Black voters \cr Y \tab
+#' numeric \tab the fraction of voters who registered themselves\cr N \tab
+#' numeric \tab the total number of voters in each county \cr W1 \tab numeric
+#' \tab the actual fraction of Black voters who registered themselves \cr W2
+#' \tab numeric \tab the actual fraction of White voters who registered
+#' themselves }
+#' @references King, G. (1997). \dQuote{A Solution to the Ecological Inference
+#' Problem: Reconstructing Individual Behavior from Aggregate Data}. Princeton
+#' University Press, Princeton, NJ.
+#' @keywords datasets
+NULL
+
diff --git a/R/summary.eco.R b/R/summary.eco.R
index 90aeca7..2a68d00 100644
--- a/R/summary.eco.R
+++ b/R/summary.eco.R
@@ -1,3 +1,45 @@
+#' Summarizing the Results for the Bayesian Parametric Model for Ecological
+#' Inference in 2x2 Tables
+#'
+#' \code{summary} method for class \code{eco}.
+#'
+#'
+#' @aliases summary.eco print.eco
+#' @param object An output object from \code{eco}.
+#' @param CI A vector of lower and upper bounds for the Bayesian credible
+#' intervals used to summarize the results. The default is the equal tail 95
+#' percent credible interval.
+#' @param param Logical. If \code{TRUE}, the posterior estimates of the
+#' population parameters will be provided. The default value is \code{TRUE}.
+#' @param units Logical. If \code{TRUE}, the in-sample predictions for each
+#' unit or for a subset of units will be provided. The default value is
+#' \code{FALSE}.
+#' @param subset A numeric vector indicating the subset of the units whose
+#' in-sample predications to be provided when \code{units} is \code{TRUE}. The
+#' default value is \code{NULL} where the in-sample predictions for each unit
+#' will be provided.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{summary.eco} yields an object of class \code{summary.eco}
+#' containing the following elements:
+#' \item{call}{The call from \code{eco}.}
+#' \item{n.obs}{The number of units.}
+#' \item{n.draws}{The number of Monte Carlo samples.}
+#' \item{agg.table}{Aggregate posterior estimates of the marginal
+#' means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If
+#' \code{param = TRUE}, the following elements are also included:
+#' \item{param.table}{Posterior estimates of model parameters: population mean
+#' estimates of \eqn{W_1} and \eqn{W_2} and their logit transformations.} If
+#' \code{units = TRUE}, the following elements are also included:
+#' \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
+#' \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
+#'
+#' This object can be printed by \code{print.summary.eco}
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{eco}, \code{predict.eco}
+#' @keywords methods
summary.eco <- function(object, CI = c(2.5, 97.5), param = TRUE,
units = FALSE, subset = NULL,...) {
diff --git a/R/summary.ecoML.R b/R/summary.ecoML.R
index c5d2ace..c1e0505 100644
--- a/R/summary.ecoML.R
+++ b/R/summary.ecoML.R
@@ -1,4 +1,63 @@
-##for simlicity, this summary function only reports parameters related to W_1 and W_2
+## for simlicity, this summary function only reports parameters related to W_1 and W_2
+
+#' Summarizing the Results for the Maximum Likelihood Parametric Model for
+#' Ecological Inference in 2x2 Tables
+#'
+#' \code{summary} method for class \code{eco}.
+#'
+#'
+#' @aliases summary.ecoML
+#' @param object An output object from \code{eco}.
+#' @param CI A vector of lower and upper bounds for the Bayesian credible
+#' intervals used to summarize the results. The default is the equal tail 95
+#' percent credible interval.
+#' @param param Ignored.
+#' @param subset A numeric vector indicating the subset of the units whose
+#' in-sample predications to be provided when \code{units} is \code{TRUE}. The
+#' default value is \code{NULL} where the in-sample predictions for each unit
+#' will be provided.
+#' @param units Logical. If \code{TRUE}, the in-sample predictions for each
+#' unit or for a subset of units will be provided. The default value is
+#' \code{FALSE}.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{summary.eco} yields an object of class \code{summary.eco}
+#' containing the following elements:
+#' \item{call}{The call from \code{eco}.}
+#' \item{sem}{Whether the SEM algorithm was executed, as specified by the user
+#' upon calling \code{ecoML}.}
+#' \item{fix.rho}{Whether the correlation parameter was fixed or allowed to
+#' vary, as specified by the user upon calling \code{ecoML}.}
+#' \item{epsilon}{The convergence threshold specified by the user upon
+#' calling \code{ecoML}.}
+#' \item{n.obs}{The number of units.}
+#' \item{iters.em}{The number iterations the EM algorithm cycled through before
+#' convergence or reaching the maximum number of iterations allowed.}
+#' \item{iters.sem}{The number iterations the SEM algorithm cycled through
+#' before convergence or reaching the maximum number of iterations allowed.}
+#' \item{loglik}{The final observed log-likelihood.}
+#' \item{rho}{A matrix of \code{iters.em} rows specifying the correlation parameters at each iteration
+#' of the EM algorithm. The number of columns depends on how many correlation
+#' parameters exist in the model. Column order is the same as the order of the
+#' parameters in \code{param.table}.}
+#' \item{param.table}{Final estimates of the parameter values for the model.
+#' Excludes parameters fixed by the user upon calling \code{ecoML}.
+#' See \code{ecoML} documentation for order of parameters.}
+#' \item{agg.table}{Aggregate estimates of the marginal means of
+#' \eqn{W_1} and \eqn{W_2}}
+#' \item{agg.wtable}{Aggregate estimates of the marginal means
+#' of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.}
+#' If \code{units = TRUE}, the following elements are also included:
+#' \item{W.table}{Unit-level estimates for \eqn{W_1} and \eqn{W_2}.}
+#'
+#' This object can be printed by \code{print.summary.eco}
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}; Aaron Strauss,
+#' Department of Politics, Princeton University,
+#' \email{abstraus@@Princeton.Edu}
+#' @seealso \code{ecoML}
+#' @keywords methods
summary.ecoML <- function(object, CI = c(2.5, 97.5), param = TRUE, units = FALSE, subset = NULL, ...) {
diff --git a/R/summary.ecoNP.R b/R/summary.ecoNP.R
index 90d962d..9a10ba5 100644
--- a/R/summary.ecoNP.R
+++ b/R/summary.ecoNP.R
@@ -1,3 +1,46 @@
+#' Summarizing the Results for the Bayesian Nonparametric Model for Ecological
+#' Inference in 2x2 Tables
+#'
+#' \code{summary} method for class \code{ecoNP}.
+#'
+#'
+#' @aliases summary.ecoNP
+#' @param object An output object from \code{ecoNP}.
+#' @param CI A vector of lower and upper bounds for the Bayesian credible
+#' intervals used to summarize the results. The default is the equal tail 95
+#' percent credible interval.
+#' @param param Logical. If \code{TRUE}, the posterior estimates of the
+#' population parameters will be provided. The default value is \code{FALSE}.
+#' @param units Logical. If \code{TRUE}, the in-sample predictions for each
+#' unit or for a subset of units will be provided. The default value is
+#' \code{FALSE}.
+#' @param subset A numeric vector indicating the subset of the units whose
+#' in-sample predications to be provided when \code{units} is \code{TRUE}. The
+#' default value is \code{NULL} where the in-sample predictions for each unit
+#' will be provided.
+#' @param ... further arguments passed to or from other methods.
+#' @return \code{summary.ecoNP} yields an object of class \code{summary.ecoNP}
+#' containing the following elements:
+#' \item{call}{The call from \code{ecoNP}.}
+#' \item{n.obs}{The number of units.}
+#' \item{n.draws}{The number of Monte Carlo samples.}
+#' \item{agg.table}{Aggregate posterior estimates of the marginal
+#' means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If
+#' \code{param = TRUE}, the following elements are also included:
+#' \item{param.table}{Posterior estimates of model parameters: population mean
+#' estimates of \eqn{W_1} and \eqn{W_2}. If \code{subset} is specified, only a
+#' subset of the population parameters are included.} If \code{unit = TRUE},
+#' the following elements are also included:
+#' \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
+#' \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
+#'
+#' This object can be printed by \code{print.summary.ecoNP}
+#' @author Kosuke Imai, Department of Politics, Princeton University,
+#' \email{kimai@@Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+#' Center for Promoting Research Involving Innovative Statistical Methodology
+#' (PRIISM), New York University \email{ying.lu@@nyu.Edu}
+#' @seealso \code{ecoNP}, \code{predict.eco}
+#' @keywords methods
summary.ecoNP <- function(object, CI=c(2.5, 97.5), param=FALSE, units=FALSE, subset=NULL,...) {
diff --git a/R/wallace.R b/R/wallace.R
new file mode 100644
index 0000000..23d9126
--- /dev/null
+++ b/R/wallace.R
@@ -0,0 +1,30 @@
+
+
+#' Black voting rates for Wallace for President, 1968
+#'
+#' This data set contains, on a county level, the proportion of county
+#' residents who are Black and the proportion of presidential votes cast for
+#' Wallace. Demographic data is based on the 1960 census. Presidential returns
+#' are from ICPSR study 13. County data from 10 southern states (Alabama,
+#' Arkansas, Georgia, Florida, Louisiana, Mississippi, North Carolina, South
+#' Carolina, Tennessee, Texas) are included. (Virginia is excluded due to the
+#' difficulty of matching counties between the datasets.) This data is
+#' analyzed in Wallace and Segal (1973).
+#'
+#'
+#' @name wallace
+#' @docType data
+#' @format A data frame containing 3 variables and 1009 observations
+#' \tabular{lll}{
+#' X \tab numeric \tab proportion of the population that is Black \cr
+#' Y \tab numeric \tab proportion presidential votes cast for Wallace \cr
+#' FIPS \tab numeric \tab the FIPS county code
+#' }
+#' @references Wasserman, Ira M. and David R. Segal (1973). ``Aggregation
+#' Effects in the Ecological Study of Presidential Voting.'' American Journal
+#' of Political Science. vol. 17, pp. 177-81.
+#' @keywords datasets
+NULL
+
+
+
diff --git a/man/Qfun.Rd b/man/Qfun.Rd
index 498b804..d7b7ebc 100644
--- a/man/Qfun.Rd
+++ b/man/Qfun.Rd
@@ -1,45 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Qfun.R
\name{Qfun}
-
\alias{Qfun}
-
-\title{Fitting the Parametric Bayesian Model of Ecological Inference in
- 2x2 Tables}
-
-\description{
- \code{Qfun} returns the complete log-likelihood
-that is used to calculate the fraction of missing information.}
-
+\title{Fitting the Parametric Bayesian Model of Ecological Inference in 2x2 Tables}
\usage{
Qfun(theta, suff.stat, n)
}
-
\arguments{
- \item{theta}{A vector that contains the MLE \eqn{E(W_1)},\eqn{E(W_2)},
- \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. Typically it is
-the element \code{theta.em} of an object of class \code{ecoML}.}
- \item{suff.stat}{A vector of sufficient statistics of \eqn{E(W_1)},
+\item{theta}{A vector that contains the MLE \eqn{E(W_1)},\eqn{E(W_2)},
+\eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. Typically it is the
+element \code{theta.em} of an object of class \code{ecoML}.}
+
+\item{suff.stat}{A vector of sufficient statistics of \eqn{E(W_1)},
\eqn{E(W_2)}, \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}.}
+
\item{n}{A integer representing the sample size.}
}
+\description{
+\code{Qfun} returns the complete log-likelihood that is used to calculate
+the fraction of missing information.
+}
+\references{
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R
+Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+Software, Vol. 42, No. 5, pp. 1-23. available at
+\url{http://imai.princeton.edu/software/eco.html}
+
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
+Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available
+at \url{http://imai.princeton.edu/research/eiall.html}
+}
+\seealso{
+\code{ecoML}
+}
\author{
- Kosuke Imai, Department of Politics, Princeton University,
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu};
- Ying Lu, Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University
- \email{ying.lu at nyu.Edu}
- Aaron Strauss, Department of Politics, Princeton University,
- \email{abstraus at Princeton.Edu}.
- }
-
- \references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011).
- \dQuote{eco: R Package for Ecological Inference in 2x2 Tables}
- Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. available
- at \url{http://imai.princeton.edu/software/eco.html}
-
- Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
- Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
- Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available at
- \url{http://imai.princeton.edu/research/eiall.html}
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu} Aaron Strauss,
+Department of Politics, Princeton University,
+\email{abstraus at Princeton.Edu}.
}
-
-\seealso{\code{ecoML}}
\keyword{models}
diff --git a/man/census.Rd b/man/census.Rd
index 1c50f73..95517ca 100644
--- a/man/census.Rd
+++ b/man/census.Rd
@@ -1,39 +1,28 @@
-\name{census}
-
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/census.R
\docType{data}
-
+\name{census}
\alias{census}
-
-\title{Black Illiteracy Rates in 1910 US Census}
-
+\title{Black Illiteracy Rates in 1910 US Census}
+\format{A data frame containing 5 variables and 1040 observations
+\tabular{lll}{ X \tab numeric \tab the proportion of Black residents in each
+county\cr Y \tab numeric \tab the overall literacy rates in each county\cr N
+\tab numeric \tab the total number of residents in each county \cr W1 \tab
+numeric \tab the actual Black literacy rate \cr W2 \tab numeric \tab the
+actual White literacy rate }}
\description{
- This data set contains the proportion of the residents who are black,
- the proportion of those who can read, the total population as well as
- the actual black literacy rate and white literacy rate for 1040
- counties in the US. The dataset was originally analyzed by Robinson
- (1950) at the state level. King (1997) recoded the 1910 census at
- county level. The data set only includes those who are older than 10
- years of age.
-}
-
-\usage{data(census)}
-
-\format{A data frame containing 5 variables and 1040 observations
- \tabular{lll}{
- X \tab numeric \tab the proportion of Black residents in each county\cr
- Y \tab numeric \tab the overall literacy rates in each county\cr
- N \tab numeric \tab the total number of residents in each county \cr
- W1 \tab numeric \tab the actual Black literacy rate \cr
- W2 \tab numeric \tab the actual White literacy rate
- }
+This data set contains the proportion of the residents who are black, the
+proportion of those who can read, the total population as well as the actual
+black literacy rate and white literacy rate for 1040 counties in the US. The
+dataset was originally analyzed by Robinson (1950) at the state level. King
+(1997) recoded the 1910 census at county level. The data set only includes
+those who are older than 10 years of age.
}
\references{
- Robinson, W.S. (1950). ``Ecological Correlations and the Behavior
- of Individuals.'' \emph{American Sociological Review}, vol. 15,
- pp.351-357. \cr \cr
- King, G. (1997). \dQuote{A Solution to the Ecological
- Inference Problem: Reconstructing Individual Behavior from Aggregate
- Data}. Princeton University Press, Princeton, NJ.
+Robinson, W.S. (1950). ``Ecological Correlations and the
+Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15,
+pp.351-357. \cr \cr King, G. (1997). \dQuote{A Solution to the Ecological
+Inference Problem: Reconstructing Individual Behavior from Aggregate Data}.
+Princeton University Press, Princeton, NJ.
}
-
\keyword{datasets}
diff --git a/man/eco.Rd b/man/eco.Rd
index 7f79d02..e7ccb2b 100644
--- a/man/eco.Rd
+++ b/man/eco.Rd
@@ -1,143 +1,150 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/eco.R
\name{eco}
-
\alias{eco}
-
-\title{Fitting the Parametric Bayesian Model of Ecological Inference in
- 2x2 Tables}
-
-\description{ \code{eco} is used to fit the parametric Bayesian model
- (based on a Normal/Inverse-Wishart prior) for ecological inference
- in \eqn{2 \times 2} tables via Markov chain Monte Carlo. It gives
- the in-sample predictions as well as the estimates of the model
- parameters. The model and algorithm are described in Imai, Lu and
- Strauss (2008, 2011). }
-
+\title{Fitting the Parametric Bayesian Model of Ecological Inference in 2x2 Tables}
\usage{
-eco(formula, data = parent.frame(), N = NULL, supplement = NULL,
- context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10,
- mu.start = 0, Sigma.start = 10, parameter = TRUE,
- grid = FALSE, n.draws = 5000, burnin = 0, thin = 0,
- verbose = FALSE)
+eco(formula, data = parent.frame(), N = NULL, supplement = NULL,
+ context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10,
+ mu.start = 0, Sigma.start = 10, parameter = TRUE, grid = FALSE,
+ n.draws = 5000, burnin = 0, thin = 0, verbose = FALSE)
}
-
\arguments{
- \item{formula}{A symbolic description of the model to be fit,
- specifying the column and row margins of \eqn{2 \times
- 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the
- column margin (e.g., turnout) and \code{X} as the row margin
- (e.g., percent African-American). Details and specific
- examples are given below.
- }
- \item{data}{An optional data frame in which to interpret the variables
- in \code{formula}. The default is the environment in which
- \code{eco} is called.
- }
- \item{N}{An optional variable representing the size of the unit; e.g.,
- the total number of voters. \code{N} needs to be a vector of same length
- as \code{Y} and \code{X} or a scalar. }
- \item{supplement}{An optional matrix of supplemental data. The matrix
- has two columns, which contain additional individual-level data such
- as survey data for \eqn{W_1} and \eqn{W_2}, respectively. If
- \code{NULL}, no additional individual-level data are included in the
- model. The default is \code{NULL}.
- }
- \item{context}{Logical. If \code{TRUE}, the contextual effect is also
- modeled, that is to assume the row margin \eqn{X} and the unknown
- \eqn{W_1} and \eqn{W_2} are correlated. See Imai, Lu and Strauss
- (2008, 2011) for details. The default is \code{FALSE}.
- }
- \item{mu0}{A scalar or a numeric vector that specifies the prior mean
- for the mean parameter \eqn{\mu} for \eqn{(W_1,W_2)} (or for
- \eqn{(W_1, W_2, X)} if \code{context=TRUE}). When the input of
- \code{mu0} is a scalar, its value will be repeated to yield a vector of
- the length of \eqn{\mu}, otherwise, it needs to be a vector of same
- length as \eqn{\mu}. When \code{context=TRUE}, the length of \eqn{\mu}
- is 3, otherwise it is 2. The default is \code{0}.
- }
- \item{tau0}{A positive integer representing the scale parameter of the
- Normal-Inverse Wishart prior for the mean and variance parameter
- \eqn{(\mu, \Sigma)}. The default is \code{2}.}
- \item{nu0}{A positive integer representing the prior degrees of
- freedom of the Normal-Inverse Wishart prior for the mean and
- variance parameter \eqn{(\mu, \Sigma)}. The default is \code{4}.
- }
- \item{S0}{A positive scalar or a positive definite matrix that specifies
- the prior scale matrix of the Normal-Inverse Wishart prior for the
- mean and variance parameter \eqn{(\mu, \Sigma)} . If it is
- a scalar, then the prior scale matrix will be a diagonal matrix with
- the same dimensions as \eqn{\Sigma} and the diagonal elements all take
- value of \code{S0}, otherwise \code{S0} needs to have same dimensions as
- \eqn{\Sigma}. When \code{context=TRUE}, \eqn{\Sigma} is a
- \eqn{3 \times 3} matrix, otherwise, it is \eqn{2 \times 2}.
- The default is \code{10}.
- }
- \item{mu.start}{A scalar or a numeric vector that specifies the
- starting values of the mean parameter \eqn{\mu}.
- If it is a scalar, then its value will be repeated to
- yield a vector of the length of \eqn{\mu}, otherwise,
- it needs to be a vector of same length as \eqn{\mu}.
- When \code{context=FALSE}, the length of \eqn{\mu} is 2,
- otherwise it is 3. The default is \code{0}.
- }
- \item{Sigma.start}{A scalar or a positive definite matrix
- that specified the starting value of the variance matrix
- \eqn{\Sigma}. If it is a scalar, then the prior scale
- matrix will be a diagonal matrix with the same dimensions
- as \eqn{\Sigma} and the diagonal elements all take value
- of \code{S0}, otherwise \code{S0} needs to have same dimensions as
- \eqn{\Sigma}. When \code{context=TRUE}, \eqn{\Sigma} is a
- \eqn{3 \times 3} matrix, otherwise, it is \eqn{2 \times 2}.
- The default is \code{10}.
- }
- \item{parameter}{Logical. If \code{TRUE}, the Gibbs draws of the population
- parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to
- the in-sample predictions of the missing internal cells,
- \eqn{W}. The default is \code{TRUE}.
- }
- \item{grid}{Logical. If \code{TRUE}, the grid method is used to sample
- \eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis
- algorithm is used where candidate draws are sampled from the uniform
- distribution on the tomography line for each unit. Note that the
- grid method is significantly slower than the Metropolis algorithm.
- The default is \code{FALSE}.
- }
- \item{n.draws}{A positive integer. The number of MCMC draws.
- The default is \code{5000}.
- }
- \item{burnin}{A positive integer. The burnin interval for the Markov
- chain; i.e. the number of initial draws that should not be stored. The
- default is \code{0}.
- }
- \item{thin}{A positive integer. The thinning interval for the
- Markov chain; i.e. the number of Gibbs draws between the recorded
- values that are skipped. The default is \code{0}.
- }
- \item{verbose}{Logical. If \code{TRUE}, the progress of the Gibbs
- sampler is printed to the screen. The default is \code{FALSE}.
- }
+\item{formula}{A symbolic description of the model to be fit, specifying the
+column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X}
+specifies \code{Y} as the column margin (e.g., turnout) and \code{X} as the
+row margin (e.g., percent African-American). Details and specific examples
+are given below.}
+
+\item{data}{An optional data frame in which to interpret the variables in
+\code{formula}. The default is the environment in which \code{eco} is
+called.}
+
+\item{N}{An optional variable representing the size of the unit; e.g., the
+total number of voters. \code{N} needs to be a vector of same length as
+\code{Y} and \code{X} or a scalar.}
+
+\item{supplement}{An optional matrix of supplemental data. The matrix has
+two columns, which contain additional individual-level data such as survey
+data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no
+additional individual-level data are included in the model. The default is
+\code{NULL}.}
+
+\item{context}{Logical. If \code{TRUE}, the contextual effect is also
+modeled, that is to assume the row margin \eqn{X} and the unknown \eqn{W_1}
+and \eqn{W_2} are correlated. See Imai, Lu and Strauss (2008, 2011) for
+details. The default is \code{FALSE}.}
+
+\item{mu0}{A scalar or a numeric vector that specifies the prior mean for
+the mean parameter \eqn{\mu} for \eqn{(W_1,W_2)} (or for \eqn{(W_1, W_2, X)}
+if \code{context=TRUE}). When the input of \code{mu0} is a scalar, its value
+will be repeated to yield a vector of the length of \eqn{\mu}, otherwise, it
+needs to be a vector of same length as \eqn{\mu}. When \code{context=TRUE},
+the length of \eqn{\mu} is 3, otherwise it is 2. The default is \code{0}.}
+
+\item{tau0}{A positive integer representing the scale parameter of the
+Normal-Inverse Wishart prior for the mean and variance parameter \eqn{(\mu,
+\Sigma)}. The default is \code{2}.}
+
+\item{nu0}{A positive integer representing the prior degrees of freedom of
+the Normal-Inverse Wishart prior for the mean and variance parameter
+\eqn{(\mu, \Sigma)}. The default is \code{4}.}
+
+\item{S0}{A positive scalar or a positive definite matrix that specifies the
+prior scale matrix of the Normal-Inverse Wishart prior for the mean and
+variance parameter \eqn{(\mu, \Sigma)} . If it is a scalar, then the prior
+scale matrix will be a diagonal matrix with the same dimensions as
+\eqn{\Sigma} and the diagonal elements all take value of \code{S0},
+otherwise \code{S0} needs to have same dimensions as \eqn{\Sigma}. When
+\code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} matrix, otherwise,
+it is \eqn{2 \times 2}. The default is \code{10}.}
+
+\item{mu.start}{A scalar or a numeric vector that specifies the starting
+values of the mean parameter \eqn{\mu}. If it is a scalar, then its value
+will be repeated to yield a vector of the length of \eqn{\mu}, otherwise, it
+needs to be a vector of same length as \eqn{\mu}. When
+\code{context=FALSE}, the length of \eqn{\mu} is 2, otherwise it is 3. The
+default is \code{0}.}
+
+\item{Sigma.start}{A scalar or a positive definite matrix that specified the
+starting value of the variance matrix \eqn{\Sigma}. If it is a scalar, then
+the prior scale matrix will be a diagonal matrix with the same dimensions as
+\eqn{\Sigma} and the diagonal elements all take value of \code{S0},
+otherwise \code{S0} needs to have same dimensions as \eqn{\Sigma}. When
+\code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} matrix, otherwise,
+it is \eqn{2 \times 2}. The default is \code{10}.}
+
+\item{parameter}{Logical. If \code{TRUE}, the Gibbs draws of the population
+parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to the
+in-sample predictions of the missing internal cells, \eqn{W}. The default is
+\code{TRUE}.}
+
+\item{grid}{Logical. If \code{TRUE}, the grid method is used to sample
+\eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis algorithm is
+used where candidate draws are sampled from the uniform distribution on the
+tomography line for each unit. Note that the grid method is significantly
+slower than the Metropolis algorithm. The default is \code{FALSE}.}
+
+\item{n.draws}{A positive integer. The number of MCMC draws. The default is
+\code{5000}.}
+
+\item{burnin}{A positive integer. The burnin interval for the Markov chain;
+i.e. the number of initial draws that should not be stored. The default is
+\code{0}.}
+
+\item{thin}{A positive integer. The thinning interval for the Markov chain;
+i.e. the number of Gibbs draws between the recorded values that are skipped.
+The default is \code{0}.}
+
+\item{verbose}{Logical. If \code{TRUE}, the progress of the Gibbs sampler is
+printed to the screen. The default is \code{FALSE}.}
+}
+\value{
+An object of class \code{eco} containing the following elements:
+\item{call}{The matched call.}
+\item{X}{The row margin, \eqn{X}.}
+\item{Y}{The column margin, \eqn{Y}.}
+\item{N}{The size of each table, \eqn{N}.}
+\item{burnin}{The number of initial burnin draws.}
+\item{thin}{The thinning interval.}
+\item{nu0}{The prior degrees of freedom.}
+\item{tau0}{The prior scale parameter.}
+\item{mu0}{The prior mean.}
+\item{S0}{The prior scale matrix.}
+\item{W}{A three dimensional array storing the posterior in-sample predictions of \eqn{W}.
+The first dimension indexes the Monte Carlo draws, the second dimension indexes the
+columns of the table, and the third dimension represents the observations.}
+\item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.}
+\item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.} The
+following additional elements are included in the output when
+\code{parameter = TRUE}.
+\item{mu}{The posterior draws of the population mean parameter, \eqn{\mu}.}
+\item{Sigma}{The posterior draws of the population variance matrix, \eqn{\Sigma}.}
+}
+\description{
+\code{eco} is used to fit the parametric Bayesian model (based on a
+Normal/Inverse-Wishart prior) for ecological inference in \eqn{2 \times 2}
+tables via Markov chain Monte Carlo. It gives the in-sample predictions as
+well as the estimates of the model parameters. The model and algorithm are
+described in Imai, Lu and Strauss (2008, 2011).
}
-
\details{
- An example of \eqn{2 \times 2} ecological table for racial voting is
- given below:
- \tabular{llccc}{
- \tab \tab black voters \tab white voters \tab \cr
- \tab vote \tab \eqn{W_{1i}} \tab \eqn{W_{2i}} \tab \eqn{Y_i} \cr
- \tab not vote \tab \eqn{1-W_{1i}} \tab \eqn{1-W_{2i}} \tab \eqn{1-Y_i} \cr
- \tab \tab \eqn{X_i} \tab \eqn{1-X_i} \tab
- }
- where \eqn{Y_i} and \eqn{X_i} represent the observed margins, and
- \eqn{W_1} and \eqn{W_2} are unknown variables. In this exmaple,
- \eqn{Y_i} is the turnout rate in the ith precint, \eqn{X_i} is the
- proproption of African American in the ith precinct. The unknowns
- \eqn{W_{1i}} an d\eqn{W_{2i}} are the black and white turnout,
- respectively. All variables are proportions and hence bounded
- between 0 and 1. For each \eqn{i}, the following deterministic
- relationship holds, \eqn{Y_i=X_i W_{1i}+(1-X_i)W_{2i}}.
+An example of \eqn{2 \times 2} ecological table for racial voting is given
+below: \tabular{llccc}{ \tab \tab black voters \tab white voters \tab \cr
+\tab vote \tab \eqn{W_{1i}} \tab \eqn{W_{2i}} \tab \eqn{Y_i} \cr \tab not
+vote \tab \eqn{1-W_{1i}} \tab \eqn{1-W_{2i}} \tab \eqn{1-Y_i} \cr \tab \tab
+\eqn{X_i} \tab \eqn{1-X_i} \tab } where \eqn{Y_i} and \eqn{X_i} represent
+the observed margins, and \eqn{W_1} and \eqn{W_2} are unknown variables. In
+this exmaple, \eqn{Y_i} is the turnout rate in the ith precint, \eqn{X_i} is
+the proproption of African American in the ith precinct. The unknowns
+\eqn{W_{1i}} an d\eqn{W_{2i}} are the black and white turnout, respectively.
+All variables are proportions and hence bounded between 0 and 1. For each
+\eqn{i}, the following deterministic relationship holds, \eqn{Y_i=X_i
+W_{1i}+(1-X_i)W_{2i}}.
}
-
\examples{
+
## load the registration data
\dontrun{data(reg)
@@ -169,51 +176,26 @@ out1 <- predict(res1, verbose = TRUE)
## summarize the results
summary(out1)
}
-}
-\value{
- An object of class \code{eco} containing the following elements:
- \item{call}{The matched call.}
- \item{X}{The row margin, \eqn{X}.}
- \item{Y}{The column margin, \eqn{Y}.}
- \item{N}{The size of each table, \eqn{N}.}
- \item{burnin}{The number of initial burnin draws.}
- \item{thin}{The thinning interval.}
- \item{nu0}{The prior degrees of freedom.}
- \item{tau0}{The prior scale parameter.}
- \item{mu0}{The prior mean.}
- \item{S0}{The prior scale matrix.}
- \item{W}{A three dimensional array storing the posterior in-sample
- predictions of \eqn{W}. The first dimension indexes the Monte Carlo
- draws, the second dimension indexes the columns of the table, and the
- third dimension represents the observations.}
- \item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.}
- \item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.}
- The following additional elements are included in the output when
- \code{parameter = TRUE}.
- \item{mu}{The posterior draws of the population mean parameter,
- \eqn{\mu}.}
- \item{Sigma}{The posterior draws of the population variance matrix,
- \eqn{\Sigma}.}
}
-
+\references{
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R
+Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+Software, Vol. 42, No. 5, pp. 1-23. available at
+\url{http://imai.princeton.edu/software/eco.html}
+
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
+Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available
+at \url{http://imai.princeton.edu/research/eiall.html}
+}
+\seealso{
+\code{ecoML}, \code{ecoNP}, \code{predict.eco}, \code{summary.eco}
+}
\author{
- Kosuke Imai, Department of Politics, Princeton University,
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu};
- Ying Lu,Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University,
- \email{ying.lu at nyu.Edu}
- }
-
- \references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011).
- \dQuote{eco: R Package for Ecological Inference in 2x2 Tables}
- Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. available
- at \url{http://imai.princeton.edu/software/eco.html}
-
- Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
- Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
- Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available at
- \url{http://imai.princeton.edu/research/eiall.html}
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying
+Lu,Center for Promoting Research Involving Innovative Statistical
+Methodology (PRIISM), New York University, \email{ying.lu at nyu.Edu}
}
-
-\seealso{\code{ecoML}, \code{ecoNP}, \code{predict.eco}, \code{summary.eco}}
\keyword{models}
diff --git a/man/ecoBD.Rd b/man/ecoBD.Rd
index 9cd387e..dadc98e 100644
--- a/man/ecoBD.Rd
+++ b/man/ecoBD.Rd
@@ -1,88 +1,96 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ecoBD.R
\name{ecoBD}
-
\alias{ecoBD}
-
\title{Calculating the Bounds for Ecological Inference in RxC Tables}
-
-\description{
- \code{ecoBD} is used to calculate the bounds for missing internal cells
- of \eqn{R \times C} ecological table. The data can be entered either
- in the form of counts or proportions.
-}
-
\usage{
ecoBD(formula, data = parent.frame(), N = NULL)
}
-
\arguments{
- \item{formula}{A symbolic description of ecological table to be used,
- specifying the column and row margins of \eqn{R \times
- C} ecological tables.
- Details and specific
- examples are given below.
- }
- \item{data}{An optional data frame in which to interpret the variables
- in \code{formula}. The default is the environment in which
- \code{ecoBD} is called.
- }
- \item{N}{An optional variable representing the size of the unit; e.g.,
- the total number of voters. If \code{formula} is entered as counts
- and the last row and/or column is omitted, this input is necessary.}
-}
+\item{formula}{A symbolic description of ecological table to be used,
+specifying the column and row margins of \eqn{R \times C} ecological tables.
+Details and specific examples are given below.}
-\details{
- The data may be entered either in the form of counts or proportions.
- If proportions are used, \code{formula} may omit the last row and/or
- column of tables, which can be calculated from the remaining margins.
- For example, \code{Y ~ X} specifies \code{Y} as the first column
- margin and \code{X} as the first row margin in \eqn{2 \times 2} tables.
- If counts are used, \code{formula} may omit the last row and/or column
- margin of the table only if \code{N} is supplied. In this example, the
- columns will be labeled as \code{X} and \code{not X}, and the rows
- will be labeled as \code{Y} and \code{not Y}.
+\item{data}{An optional data frame in which to interpret the variables in
+\code{formula}. The default is the environment in which \code{ecoBD} is
+called.}
- For larger tables, one can use \code{cbind()} and \code{+}. For
- example, \code{cbind(Y1,
- Y2, Y3) ~ X1 + X2 + X3 + X4)} specifies \eqn{3 \times 4} tables.
-
- An \eqn{R \times C} ecological table in the form of counts:
- \tabular{lcccc}{
- \eqn{n_{i11}} \tab \eqn{n_{i12}} \tab \dots \tab \eqn{n_{i1C}} \tab \eqn{n_{i1.}} \cr
- \eqn{n_{i21}} \tab \eqn{n_{i22}} \tab \dots \tab \eqn{n_{i2C}} \tab \eqn{n_{i2.}} \cr
- \dots \tab \dots \tab \dots \tab \dots \tab \dots\cr
- \eqn{n_{iR1}} \tab \eqn{n_{iR2}} \tab \dots \tab \eqn{n_{iRC}} \tab \eqn{n_{iR.}} \cr
- \eqn{n_{i.1}} \tab \eqn{n_{i.2}} \tab \dots \tab \eqn{n_{i.C}} \tab \eqn{N_i}
- }
- where \eqn{n_{nr.}} and \eqn{n_{i.c}} represent the observed margins,
- \eqn{N_i} represents the size of the table, and \eqn{n_{irc}} are
- unknown variables. Note that for each \eqn{i}, the following
- deterministic relationships hold; \eqn{n_{ir.} = \sum_{c=1}^C
- n_{irc}} for \eqn{r=1,\dots,R}, and \eqn{n_{i.c}=\sum_{r=1}^R n_{irc}}
- for \eqn{c=1,\dots,C}. Then, each of the unknown inner cells can be
- bounded in the following manner,
- \deqn{\max(0, n_{ir.}+n_{i.c}-N_i) \le n_{irc} \le \min(n_{ir.}, n_{i.c}).}
- If the size of tables, \code{N}, is provided,
-
- An \eqn{R \times C} ecological table in the form of proportions:
- \tabular{lcccc}{
- \eqn{W_{i11}} \tab \eqn{W_{i12}} \tab \dots \tab \eqn{W_{i1C}} \tab \eqn{Y_{i1}} \cr
- \eqn{W_{i21}} \tab \eqn{W_{i22}} \tab \dots \tab \eqn{W_{i2C}} \tab \eqn{Y_{i2}} \cr
- \dots \tab \dots \tab \dots \tab \dots \tab \dots \cr
- \eqn{W_{iR1}} \tab \eqn{W_{iR2}} \tab \dots \tab \eqn{W_{iRC}} \tab \eqn{Y_{iR}} \cr
- \eqn{X_{i1}} \tab \eqn{X_{i2}} \tab \dots \tab \eqn{X_{iC}} \tab
- }
- where \eqn{Y_{ir}} and \eqn{X_{ic}} represent the observed margins,
- and \eqn{W_{irc}} are unknown variables. Note that for each \eqn{i},
- the following deterministic relationships hold; \eqn{Y_{ir} =
- \sum_{c=1}^C X_{ic} W_{irc}} for \eqn{r=1,\dots,R}, and \eqn{\sum_{r=1}^R
- W_{irc}=1} for \eqn{c=1,\dots,C}. Then, each of
- the inner cells of the table can be bounded in the following manner,
- \deqn{\max(0, (X_{ic} + Y_{ir}-1)/X_{ic}) \le W_{irc}
- \le \min(1, Y_{ir}/X_{ir}).}
+\item{N}{An optional variable representing the size of the unit; e.g., the
+total number of voters. If \code{formula} is entered as counts and the last
+row and/or column is omitted, this input is necessary.}
+}
+\value{
+An object of class \code{ecoBD} containing the following elements
+(When three dimensional arrays are used, the first dimension indexes the
+observations, the second dimension indexes the row numbers, and the third
+dimension indexes the column numbers):
+\item{call}{The matched call.}
+\item{X}{A matrix of the observed row margin, \eqn{X}.}
+\item{Y}{A matrix of the observed column margin, \eqn{Y}.}
+\item{N}{A vector of the size of ecological tables, \eqn{N}.}
+\item{aggWmin}{A three dimensional array of
+aggregate lower bounds for proportions.}
+\item{aggWmax}{A three dimensional array of aggregate upper bounds for proportions.}
+\item{Wmin}{A three dimensional array of lower bounds for proportions.}
+\item{Wmax}{A three dimensional array of upper bounds for proportions.}
+\item{Nmin}{A three dimensional array of lower bounds for counts.}
+\item{Nmax}{A three dimensional array of upper bounds for counts.} The object
+can be printed through \code{print.ecoBD}.
+}
+\description{
+\code{ecoBD} is used to calculate the bounds for missing internal cells of
+\eqn{R \times C} ecological table. The data can be entered either in the
+form of counts or proportions.
}
+\details{
+The data may be entered either in the form of counts or proportions. If
+proportions are used, \code{formula} may omit the last row and/or column of
+tables, which can be calculated from the remaining margins. For example,
+\code{Y ~ X} specifies \code{Y} as the first column margin and \code{X} as
+the first row margin in \eqn{2 \times 2} tables. If counts are used,
+\code{formula} may omit the last row and/or column margin of the table only
+if \code{N} is supplied. In this example, the columns will be labeled as
+\code{X} and \code{not X}, and the rows will be labeled as \code{Y} and
+\code{not Y}.
+
+For larger tables, one can use \code{cbind()} and \code{+}. For example,
+\code{cbind(Y1, Y2, Y3) ~ X1 + X2 + X3 + X4)} specifies \eqn{3 \times 4}
+tables.
+
+An \eqn{R \times C} ecological table in the form of counts: \tabular{lcccc}{
+\eqn{n_{i11}} \tab \eqn{n_{i12}} \tab \dots{} \tab \eqn{n_{i1C}} \tab
+\eqn{n_{i1.}} \cr \eqn{n_{i21}} \tab \eqn{n_{i22}} \tab \dots{} \tab
+\eqn{n_{i2C}} \tab \eqn{n_{i2.}} \cr \dots{} \tab \dots{} \tab \dots{} \tab
+\dots{} \tab \dots{}\cr \eqn{n_{iR1}} \tab \eqn{n_{iR2}} \tab \dots{} \tab
+\eqn{n_{iRC}} \tab \eqn{n_{iR.}} \cr \eqn{n_{i.1}} \tab \eqn{n_{i.2}} \tab
+\dots{} \tab \eqn{n_{i.C}} \tab \eqn{N_i} } where \eqn{n_{nr.}} and
+\eqn{n_{i.c}} represent the observed margins, \eqn{N_i} represents the size
+of the table, and \eqn{n_{irc}} are unknown variables. Note that for each
+\eqn{i}, the following deterministic relationships hold; \eqn{n_{ir.} =
+\sum_{c=1}^C n_{irc}} for \eqn{r=1,\dots,R}, and \eqn{n_{i.c}=\sum_{r=1}^R
+n_{irc}} for \eqn{c=1,\dots,C}. Then, each of the unknown inner cells can be
+bounded in the following manner, \deqn{\max(0, n_{ir.}+n_{i.c}-N_i) \le
+n_{irc} \le \min(n_{ir.}, n_{i.c}).} If the size of tables, \code{N}, is
+provided,
+An \eqn{R \times C} ecological table in the form of proportions:
+\tabular{lcccc}{ \eqn{W_{i11}} \tab \eqn{W_{i12}} \tab \dots{} \tab
+\eqn{W_{i1C}} \tab \eqn{Y_{i1}} \cr \eqn{W_{i21}} \tab \eqn{W_{i22}} \tab
+\dots{} \tab \eqn{W_{i2C}} \tab \eqn{Y_{i2}} \cr \dots{} \tab \dots{} \tab
+\dots{} \tab \dots{} \tab \dots{} \cr \eqn{W_{iR1}} \tab \eqn{W_{iR2}} \tab
+\dots{} \tab \eqn{W_{iRC}} \tab \eqn{Y_{iR}} \cr \eqn{X_{i1}} \tab
+\eqn{X_{i2}} \tab \dots{} \tab \eqn{X_{iC}} \tab } where \eqn{Y_{ir}} and
+\eqn{X_{ic}} represent the observed margins, and \eqn{W_{irc}} are unknown
+variables. Note that for each \eqn{i}, the following deterministic
+relationships hold; \eqn{Y_{ir} = \sum_{c=1}^C X_{ic} W_{irc}} for
+\eqn{r=1,\dots,R}, and \eqn{\sum_{r=1}^R W_{irc}=1} for \eqn{c=1,\dots,C}.
+Then, each of the inner cells of the table can be bounded in the following
+manner, \deqn{\max(0, (X_{ic} + Y_{ir}-1)/X_{ic}) \le W_{irc} \le \min(1,
+Y_{ir}/X_{ir}).}
+}
\examples{
+
## load the registration data
data(reg)
@@ -90,49 +98,26 @@ data(reg)
res <- ecoBD(Y ~ X, N = N, data = reg)
## print the results
print(res)
-}
-\value{
- An object of class \code{ecoBD} containing the following elements
- (When three dimensional arrays are used, the first dimension indexes
- the observations, the second dimension indexes the row numbers, and
- the third dimension indexes the column numbers):
- \item{call}{The matched call.}
- \item{X}{A matrix of the observed row margin, \eqn{X}.}
- \item{Y}{A matrix of the observed column margin, \eqn{Y}.}
- \item{N}{A vector of the size of ecological tables, \eqn{N}.}
- \item{aggWmin}{A three dimensional array of aggregate lower bounds for
- proportions.}
- \item{aggWmax}{A three dimensional array of aggregate upper bounds for
- proportions.}
- \item{Wmin}{A three dimensional array of lower bounds for
- proportions.}
- \item{Wmax}{A three dimensional array of upper bounds for
- proportions.}
- \item{Nmin}{A three dimensional array of lower bounds for
- counts.}
- \item{Nmax}{A three dimensional array of upper bounds for
- counts.}
- The object can be printed through \code{print.ecoBD}.
}
+\references{
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2011) \dQuote{eco: R
+Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+Software, Vol. 42, No. 5, pp. 1-23. available at
+\url{http://imai.princeton.edu/software/eco.html}
-\references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011)
- \dQuote{eco: R Package for Ecological Inference in 2x2 Tables}
- Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. available
- at \url{http://imai.princeton.edu/software/eco.html}
-
- Imai, Kosuke, Ying Lu and Aaron Strauss. (2008)
- \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological
- Tables: An Incomplete Data Approach} Political Analysis, Vol. 16,
- No. 1, (Winter), pp. 41-69. available at
- \url{http://imai.princeton.edu/research/eiall.html} }
-
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2008) \dQuote{Bayesian and
+Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+Approach} Political Analysis, Vol. 16, No. 1, (Winter), pp. 41-69.
+available at \url{http://imai.princeton.edu/research/eiall.html}
+}
+\seealso{
+\code{eco}, \code{ecoNP}
+}
\author{
- Kosuke Imai, Department of Politics, Princeton University
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu/};
- Ying Lu, Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University
- \email{ying.lu at nyu.Edu}
+Kosuke Imai, Department of Politics, Princeton University
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu/}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
}
-
-\seealso{\code{eco}, \code{ecoNP}}
\keyword{models}
diff --git a/man/ecoML.Rd b/man/ecoML.Rd
index e2f1561..47f1cc3 100644
--- a/man/ecoML.Rd
+++ b/man/ecoML.Rd
@@ -1,124 +1,184 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/emeco.R
\name{ecoML}
-
\alias{ecoML}
-
-\title{Fitting Parametric Models and Quantifying Missing Information
- for Ecological Inference in 2x2 Tables}
-
-\description{
- \code{ecoML} is used to fit parametric models for ecological
- inference in \eqn{2 \times 2} tables via Expectation Maximization (EM)
- algorithms. The data is specified in proportions. At it's most basic setting, the algorithm
- assumes that the individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) and distributed bivariate normally (after logit
- transformations). The function calculates point estimates of the parameters for models
- based on different assumptions. The standard errors of the point
- estimates are also computed via Supplemented EM algorithms. Moreover,
- \code{ecoML} quantifies the amount of missing information associated
- with each parameter and allows researcher to examine the impact of
- missing information on parameter estimation in ecological
- inference. The models and algorithms are described in Imai,
- Lu and Strauss (2008, 2011).
-}
-
+\title{Fitting Parametric Models and Quantifying Missing Information for Ecological
+Inference in 2x2 Tables}
\usage{
- ecoML(formula, data = parent.frame(), N = NULL, supplement = NULL,
- theta.start = c(0,0,1,1,0), fix.rho = FALSE,
- context = FALSE, sem = TRUE, epsilon = 10^(-6),
- maxit = 1000, loglik = TRUE, hyptest = FALSE, verbose = FALSE)
+ecoML(formula, data = parent.frame(), N = NULL, supplement = NULL,
+ theta.start = c(0, 0, 1, 1, 0), fix.rho = FALSE, context = FALSE,
+ sem = TRUE, epsilon = 10^(-6), maxit = 1000, loglik = TRUE,
+ hyptest = FALSE, verbose = FALSE)
}
-
\arguments{
- \item{formula}{A symbolic description of the model to be fit,
- specifying the column and row margins of \eqn{2 \times
- 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the
- column margin (e.g., turnout) and \code{X} (e.g., percent
- African-American) as the row margin. Details and specific
- examples are given below.
- }
- \item{data}{An optional data frame in which to interpret the variables
- in \code{formula}. The default is the environment in which
- \code{ecoML} is called.
- }
- \item{N}{An optional variable representing the size of the unit; e.g.,
- the total number of voters. \code{N} needs to be a vector of same length
- as \code{Y} and \code{X} or a scalar.
- }
- \item{supplement}{An optional matrix of supplemental data. The matrix
- has two columns, which contain additional individual-level data such
- as survey data for \eqn{W_1} and \eqn{W_2}, respectively. If
- \code{NULL}, no additional individual-level data are included in the
- model. The default is \code{NULL}.
- }
- \item{fix.rho}{Logical. If \code{TRUE}, the correlation
- (when \code{context=TRUE}) or the partial correlation (when
- \code{context=FALSE}) between \eqn{W_1} and \eqn{W_2}
- is fixed through the estimation. For details, see
- Imai, Lu and Strauss(2006). The default is \code{FALSE}.
- }
- \item{context}{Logical. If \code{TRUE}, the contextual effect is also
- modeled. In this case, the row margin (i.e., X) and the individual-level rates
- (i.e., \eqn{W_1} and \eqn{W_2}) are assumed to be distributed tri-variate normally
- (after logit transformations). See Imai, Lu and Strauss (2006) for
- details. The default is \code{FALSE}.
- }
- \item{sem}{Logical. If \code{TRUE}, the standard errors of parameter
- estimates are estimated via SEM algorithm, as well as the fraction of missing data. The default is
- \code{TRUE}.
- }
- \item{theta.start}{A numeric vector that specifies the starting values
- for the mean, variance, and covariance. When \code{context = FALSE},
- the elements of \code{theta.start} correspond to (\eqn{E(W_1)},
- \eqn{E(W_2)}, \eqn{var(W_1)}, \eqn{var(W_2)},
- \eqn{cor(W_1,W_2)}). When \code{context = TRUE}, the
- elements of \code{theta.start} correspond to (\eqn{E(W_1)},
- \eqn{E(W_2)}, \eqn{var(W_1)}, \eqn{var(W_2)}, \eqn{corr(W_1, X)},
- \eqn{corr(W_2, X)}, \eqn{corr(W_1,W_2)}). Moreover, when
- \code{fix.rho=TRUE}, \eqn{corr(W_1,W_2)} is set to be the
- correlation between \eqn{W_1} and \eqn{W_2} when \code{context =
- FALSE}, and the partial correlation between \eqn{W_1} and
- \eqn{W_2} given \eqn{X} when \code{context = FALSE}. The default is
- \code{c(0,0,1,1,0)}.
- }
- \item{epsilon}{A positive number that specifies the convergence criterion
- for EM algorithm. The square root of \code{epsilon} is the convergence
- criterion for SEM algorithm. The default is \code{10^(-6)}.
- }
- \item{maxit}{A positive integer specifies the maximum number of iterations
- before the convergence criterion is met. The default is \code{1000}.
- }
- \item{loglik}{Logical. If \code{TRUE}, the value of the log-likelihood
- function at each iteration of EM is saved. The default is
- \code{TRUE}.
- }
- \item{hyptest}{Logical. If \code{TRUE}, model is estimated under the null
- hypothesis that means of \eqn{W1} and \eqn{W2} are the same.
- The default is \code{FALSE}.
- }
- \item{verbose}{Logical. If \code{TRUE}, the progress of the EM and SEM
- algorithms is printed to the screen. The default is \code{FALSE}.
- }
+\item{formula}{A symbolic description of the model to be fit, specifying the
+column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X}
+specifies \code{Y} as the column margin (e.g., turnout) and \code{X} (e.g.,
+percent African-American) as the row margin. Details and specific examples
+are given below.}
+
+\item{data}{An optional data frame in which to interpret the variables in
+\code{formula}. The default is the environment in which \code{ecoML} is
+called.}
+
+\item{N}{An optional variable representing the size of the unit; e.g., the
+total number of voters. \code{N} needs to be a vector of same length as
+\code{Y} and \code{X} or a scalar.}
+
+\item{supplement}{An optional matrix of supplemental data. The matrix has
+two columns, which contain additional individual-level data such as survey
+data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no
+additional individual-level data are included in the model. The default is
+\code{NULL}.}
+
+\item{theta.start}{A numeric vector that specifies the starting values for
+the mean, variance, and covariance. When \code{context = FALSE}, the
+elements of \code{theta.start} correspond to (\eqn{E(W_1)}, \eqn{E(W_2)},
+\eqn{var(W_1)}, \eqn{var(W_2)}, \eqn{cor(W_1,W_2)}). When \code{context =
+TRUE}, the elements of \code{theta.start} correspond to (\eqn{E(W_1)},
+\eqn{E(W_2)}, \eqn{var(W_1)}, \eqn{var(W_2)}, \eqn{corr(W_1, X)},
+\eqn{corr(W_2, X)}, \eqn{corr(W_1,W_2)}). Moreover, when
+\code{fix.rho=TRUE}, \eqn{corr(W_1,W_2)} is set to be the correlation
+between \eqn{W_1} and \eqn{W_2} when \code{context = FALSE}, and the partial
+correlation between \eqn{W_1} and \eqn{W_2} given \eqn{X} when \code{context
+= FALSE}. The default is \code{c(0,0,1,1,0)}.}
+
+\item{fix.rho}{Logical. If \code{TRUE}, the correlation (when
+\code{context=TRUE}) or the partial correlation (when \code{context=FALSE})
+between \eqn{W_1} and \eqn{W_2} is fixed through the estimation. For
+details, see Imai, Lu and Strauss(2006). The default is \code{FALSE}.}
+
+\item{context}{Logical. If \code{TRUE}, the contextual effect is also
+modeled. In this case, the row margin (i.e., X) and the individual-level
+rates (i.e., \eqn{W_1} and \eqn{W_2}) are assumed to be distributed
+tri-variate normally (after logit transformations). See Imai, Lu and Strauss
+(2006) for details. The default is \code{FALSE}.}
+
+\item{sem}{Logical. If \code{TRUE}, the standard errors of parameter
+estimates are estimated via SEM algorithm, as well as the fraction of
+missing data. The default is \code{TRUE}.}
+
+\item{epsilon}{A positive number that specifies the convergence criterion
+for EM algorithm. The square root of \code{epsilon} is the convergence
+criterion for SEM algorithm. The default is \code{10^(-6)}.}
+
+\item{maxit}{A positive integer specifies the maximum number of iterations
+before the convergence criterion is met. The default is \code{1000}.}
+
+\item{loglik}{Logical. If \code{TRUE}, the value of the log-likelihood
+function at each iteration of EM is saved. The default is \code{TRUE}.}
+
+\item{hyptest}{Logical. If \code{TRUE}, model is estimated under the null
+hypothesis that means of \eqn{W1} and \eqn{W2} are the same. The default is
+\code{FALSE}.}
+
+\item{verbose}{Logical. If \code{TRUE}, the progress of the EM and SEM
+algorithms is printed to the screen. The default is \code{FALSE}.}
+}
+\value{
+An object of class \code{ecoML} containing the following elements:
+\item{call}{The matched call.}
+\item{X}{The row margin, \eqn{X}.}
+\item{Y}{The column margin, \eqn{Y}.}
+\item{N}{The size of each table, \eqn{N}.}
+\item{context}{The assumption under which model is estimated. If
+\code{context = FALSE}, CAR assumption is adopted and no contextual effect
+is modeled. If \code{context = TRUE}, NCAR assumption is adopted, and
+contextual effect is modeled.} \item{sem}{Whether SEM algorithm is used to
+estimate the standard errors and observed information matrix for the
+parameter estimates.}
+\item{fix.rho}{Whether the correlation or the partial
+correlation between \eqn{W_1} an \eqn{W_2} is fixed in the estimation.}
+\item{r12}{If \code{fix.rho = TRUE}, the value that \eqn{corr(W_1, W_2)} is
+fixed to.}
+\item{epsilon}{The precision criterion for EM convergence.
+\eqn{\sqrt{\epsilon}} is the precision criterion for SEM convergence.}
+\item{theta.sem}{The ML estimates of \eqn{E(W_1)},\eqn{E(W_2)},
+\eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. If \code{context =
+TRUE}, \eqn{E(X)},\eqn{cov(W_1,X)}, \eqn{cov(W_2,X)} are also reported.}
+\item{W}{In-sample estimation of \eqn{W_1} and \eqn{W_2}.}
+\item{suff.stat}{The sufficient statistics for \code{theta.em}.}
+\item{iters.em}{Number of EM iterations before convergence is achieved.}
+\item{iters.sem}{Number of SEM iterations before convergence is achieved.}
+\item{loglik}{The log-likelihood of the model when convergence is achieved.}
+\item{loglik.log.em}{A vector saving the value of the log-likelihood
+function at each iteration of the EM algorithm.}
+\item{mu.log.em}{A matrix saving the unweighted mean estimation of the
+logit-transformed individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2})
+at each iteration of the EM process.} \item{Sigma.log.em}{A matrix saving the
+log of the variance estimation of the logit-transformed individual-level
+proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of EM process.
+Note, non-transformed variances are displayed on the screen (when
+\code{verbose = TRUE}).}
+\item{rho.fisher.em}{A matrix saving the fisher
+transformation of the estimation of the correlations between the
+logit-transformed individual-level proportions (i.e., \eqn{W_1} and
+\eqn{W_2}) at each iteration of EM process. Note, non-transformed
+correlations are displayed on the screen (when \code{verbose = TRUE}).}
+Moreover, when \code{sem=TRUE}, \code{ecoML} also output the following
+values:
+\item{DM}{The matrix characterizing the rates of convergence of the
+EM algorithms. Such information is also used to calculate the observed-data
+information matrix}
+\item{Icom}{The (expected) complete data information
+matrix estimated via SEM algorithm. When \code{context=FALSE, fix.rho=TRUE},
+\code{Icom} is 4 by 4. When \code{context=FALSE, fix.rho=FALSE}, \code{Icom}
+is 5 by 5. When \code{context=TRUE}, \code{Icom} is 9 by 9.}
+\item{Iobs}{The observed information matrix. The dimension of \code{Iobs}
+is same as \code{Icom}.}
+\item{Imiss}{The difference between \code{Icom} and \code{Iobs}.
+The dimension of \code{Imiss} is same as \code{miss}.}
+\item{Vobs}{The (symmetrized) variance-covariance matrix of the ML parameter
+estimates. The dimension of \code{Vobs} is same as \code{Icom}.}
+\item{Iobs}{The (expected) complete-data variance-covariance matrix. The
+dimension of \code{Iobs} is same as \code{Icom}.}
+\item{Vobs.original}{The estimated variance-covariance matrix of the ML parameter
+estimates. The dimension of \code{Vobs} is same as \code{Icom}.}
+\item{Fmis}{The fraction of missing information associated with each parameter estimation. }
+\item{VFmis}{The proportion of increased variance associated with each
+parameter estimation due to observed data. }
+\item{Ieigen}{The largest eigen value of \code{Imiss}.}
+\item{Icom.trans}{The complete data information
+matrix for the fisher transformed parameters.}
+\item{Iobs.trans}{The observed data information matrix for the fisher transformed parameters.}
+\item{Fmis.trans}{The fractions of missing information associated with the
+fisher transformed parameters.}
+}
+\description{
+\code{ecoML} is used to fit parametric models for ecological inference in
+\eqn{2 \times 2} tables via Expectation Maximization (EM) algorithms. The
+data is specified in proportions. At it's most basic setting, the algorithm
+assumes that the individual-level proportions (i.e., \eqn{W_1} and
+\eqn{W_2}) and distributed bivariate normally (after logit transformations).
+The function calculates point estimates of the parameters for models based
+on different assumptions. The standard errors of the point estimates are
+also computed via Supplemented EM algorithms. Moreover, \code{ecoML}
+quantifies the amount of missing information associated with each parameter
+and allows researcher to examine the impact of missing information on
+parameter estimation in ecological inference. The models and algorithms are
+described in Imai, Lu and Strauss (2008, 2011).
}
-
\details{
- When \code{SEM} is \code{TRUE}, \code{ecoML} computes the observed-data
- information matrix for the parameters of interest based on Supplemented-EM
- algorithm. The inverse of the observed-data information matrix can be used
- to estimate the variance-covariance matrix for the parameters estimated
- from EM algorithms. In addition, it also computes the expected complete-data
- information matrix. Based on these two measures, one can further calculate
- the fraction of missing information associated with each parameter. See
- Imai, Lu and Strauss (2006) for more details about fraction of missing
- information.
-
- Moreover, when \code{hytest=TRUE}, \code{ecoML} allows to estimate the
- parametric model under the null hypothesis that \code{mu_1=mu_2}. One
- can then construct the likelihood ratio test to assess the hypothesis of
- equal means. The associated fraction of missing information for the test
- statistic can be also calculated. For details, see Imai, Lu
- and Strauss (2006) for details.
+When \code{SEM} is \code{TRUE}, \code{ecoML} computes the observed-data
+information matrix for the parameters of interest based on Supplemented-EM
+algorithm. The inverse of the observed-data information matrix can be used
+to estimate the variance-covariance matrix for the parameters estimated from
+EM algorithms. In addition, it also computes the expected complete-data
+information matrix. Based on these two measures, one can further calculate
+the fraction of missing information associated with each parameter. See
+Imai, Lu and Strauss (2006) for more details about fraction of missing
+information.
+
+Moreover, when \code{hytest=TRUE}, \code{ecoML} allows to estimate the
+parametric model under the null hypothesis that \code{mu_1=mu_2}. One can
+then construct the likelihood ratio test to assess the hypothesis of equal
+means. The associated fraction of missing information for the test statistic
+can be also calculated. For details, see Imai, Lu and Strauss (2006) for
+details.
}
\examples{
+
## load the census data
data(census)
@@ -153,100 +213,28 @@ surv <- 1:600
## summarize the results
\dontrun{summary(res1)}
-}
-\value{
- An object of class \code{ecoML} containing the following elements:
- \item{call}{The matched call.}
- \item{X}{The row margin, \eqn{X}.}
- \item{Y}{The column margin, \eqn{Y}.}
- \item{N}{The size of each table, \eqn{N}.}
- \item{context}{The assumption under which model is estimated. If
- \code{context = FALSE}, CAR assumption is adopted and no
- contextual effect is modeled. If \code{context = TRUE}, NCAR
- assumption is adopted, and contextual effect is modeled.}
- \item{sem}{Whether SEM algorithm is used to estimate the standard
- errors and observed information matrix for the parameter estimates.}
- \item{fix.rho}{Whether the correlation or the partial correlation between
- \eqn{W_1} an \eqn{W_2} is fixed in the estimation.}
- \item{r12}{If \code{fix.rho = TRUE}, the value that \eqn{corr(W_1,
- W_2)} is fixed to.}
- \item{epsilon}{The precision criterion for EM convergence.
- \eqn{\sqrt{\epsilon}} is the precision criterion for SEM convergence.}
- \item{theta.sem}{The ML estimates of \eqn{E(W_1)},\eqn{E(W_2)},
- \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. If
- \code{context = TRUE}, \eqn{E(X)},\eqn{cov(W_1,X)},
- \eqn{cov(W_2,X)} are also reported.}
- \item{W}{In-sample estimation of \eqn{W_1} and \eqn{W_2}.}
- \item{suff.stat}{The sufficient statistics for \code{theta.em}.}
- \item{iters.em}{Number of EM iterations before convergence is achieved.}
- \item{iters.sem}{Number of SEM iterations before convergence is achieved.}
- \item{loglik}{The log-likelihood of the model when convergence is
- achieved.}
- \item{loglik.log.em}{A vector saving the value of the log-likelihood
- function at each iteration of the EM algorithm.}
- \item{mu.log.em}{A matrix saving the unweighted mean estimation of the logit-transformed
- individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of the EM process.}
- \item{Sigma.log.em}{A matrix saving the log of the variance estimation of the logit-transformed
- individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of EM process.
- Note, non-transformed variances are displayed on the screen (when \code{verbose = TRUE}).}
- \item{rho.fisher.em}{A matrix saving the fisher transformation of the estimation of the correlations between
- the logit-transformed individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of EM process.
- Note, non-transformed correlations are displayed on the screen (when \code{verbose = TRUE}).}
- Moreover, when \code{sem=TRUE}, \code{ecoML} also output the following
- values:
- \item{DM}{The matrix characterizing the rates of convergence of the EM
- algorithms. Such information is also used to calculate the observed-data
- information matrix}
- \item{Icom}{The (expected) complete data information matrix estimated
- via SEM algorithm. When \code{context=FALSE, fix.rho=TRUE},
- \code{Icom} is 4 by 4. When \code{context=FALSE, fix.rho=FALSE},
- \code{Icom} is 5 by 5. When \code{context=TRUE}, \code{Icom}
- is 9 by 9.}
- \item{Iobs}{The observed information matrix. The dimension of
- \code{Iobs} is same as \code{Icom}.}
- \item{Imiss}{The difference between \code{Icom} and \code{Iobs}.
- The dimension of \code{Imiss} is same as \code{miss}.}
- \item{Vobs}{The (symmetrized) variance-covariance matrix of the ML parameter
- estimates. The dimension of \code{Vobs} is same as
- \code{Icom}.}
- \item{Iobs}{The (expected) complete-data variance-covariance matrix.
- The dimension of \code{Iobs} is same as \code{Icom}.}
- \item{Vobs.original}{The estimated variance-covariance matrix of the
- ML parameter estimates. The dimension of \code{Vobs} is same as
- \code{Icom}.}
- \item{Fmis}{The fraction of missing information associated with each
- parameter estimation. }
- \item{VFmis}{The proportion of increased variance associated with each
- parameter estimation due to observed data. }
- \item{Ieigen}{The largest eigen value of \code{Imiss}.}
- \item{Icom.trans}{The complete data information matrix for the fisher
- transformed parameters.}
- \item{Iobs.trans}{The observed data information matrix for the fisher
- transformed parameters.}
- \item{Fmis.trans}{The fractions of missing information associated with
- the fisher transformed parameters.}
}
-
+\references{
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R
+Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+Software, Vol. 42, No. 5, pp. 1-23. available at
+\url{http://imai.princeton.edu/software/eco.html}
+
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
+Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available
+at \url{http://imai.princeton.edu/research/eiall.html}
+}
+\seealso{
+\code{eco}, \code{ecoNP}, \code{summary.ecoML}
+}
\author{
- Kosuke Imai, Department of Politics, Princeton University,
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu};
- Ying Lu, Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University,
- \email{ying.lu at nyu.Edu};
- Aaron Strauss, Department of Politics, Princeton University,
- \email{abstraus at Princeton.Edu}.
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University, \email{ying.lu at nyu.Edu}; Aaron Strauss,
+Department of Politics, Princeton University,
+\email{abstraus at Princeton.Edu}.
}
-
-\references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011).
- \dQuote{eco: R Package for Ecological Inference in 2x2 Tables}
- Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. available
- at \url{http://imai.princeton.edu/software/eco.html}
-
- Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian
- and Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete
- Data Approach} Political Analysis, Vol. 16, No. 1 (Winter),
- pp. 41-69. available at
- \url{http://imai.princeton.edu/research/eiall.html} }
-
-\seealso{\code{eco}, \code{ecoNP}, \code{summary.ecoML}}
\keyword{models}
diff --git a/man/ecoNP.Rd b/man/ecoNP.Rd
index 6390e9b..eab02fe 100644
--- a/man/ecoNP.Rd
+++ b/man/ecoNP.Rd
@@ -1,121 +1,143 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ecoNP.R
\name{ecoNP}
-
\alias{ecoNP}
-
-\title{Fitting the Nonparametric Bayesian Models of Ecological Inference
- in 2x2 Tables}
-
-\description{ \code{ecoNP} is used to fit the nonparametric Bayesian
- model (based on a Dirichlet process prior) for ecological inference
- in \eqn{2 \times 2} tables via Markov chain Monte Carlo. It gives
- the in-sample predictions as well as out-of-sample predictions for
- population inference. The models and algorithms are described in
- Imai, Lu and Strauss (2008, 2011). }
-
+\title{Fitting the Nonparametric Bayesian Models of Ecological Inference in 2x2
+Tables}
\usage{
ecoNP(formula, data = parent.frame(), N = NULL, supplement = NULL,
- context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10,
- alpha = NULL, a0 = 1, b0 = 0.1, parameter = FALSE,
- grid = FALSE, n.draws = 5000, burnin = 0, thin = 0,
- verbose = FALSE)
+ context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10,
+ alpha = NULL, a0 = 1, b0 = 0.1, parameter = FALSE, grid = FALSE,
+ n.draws = 5000, burnin = 0, thin = 0, verbose = FALSE)
}
-
\arguments{
- \item{formula}{A symbolic description of the model to be fit,
- specifying the column and row margins of \eqn{2 \times
- 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the
- column margin (e.g., turnout) and \code{X} as the row margin
- (e.g., percent African-American). Details and specific examples
- are given below.
- }
- \item{data}{An optional data frame in which to interpret the variables
- in \code{formula}. The default is the environment in which
- \code{ecoNP} is called.
- }
- \item{N}{An optional variable representing the size of the unit; e.g.,
- the total number of voters. \code{N} needs to be a vector of same length
- as \code{Y} and \code{X} or a scalar.}
- \item{supplement}{An optional matrix of supplemental data. The matrix
- has two columns, which contain additional individual-level data such
- as survey data for \eqn{W_1} and \eqn{W_2}, respectively. If
- \code{NULL}, no additional individual-level data are included in the
- model. The default is \code{NULL}.
- }
- \item{context}{Logical. If \code{TRUE}, the contextual effect is also
- modeled, that is to assume the row margin \eqn{X} and the unknown
- \eqn{W_1} and \eqn{W_2} are correlated. See Imai, Lu and Strauss
- (2008, 2011) for details. The default is \code{FALSE}.
- }
- \item{mu0}{A scalar or a numeric vector that specifies the prior mean
- for the mean parameter \eqn{\mu} of the base prior distribution \eqn{G_0}
- (see Imai, Lu and Strauss (2008, 2011) for detailed
- descriptions of Dirichlete prior and the normal base prior distribution) .
- If it is a scalar, then its value will be repeated to yield a vector
- of the length of \eqn{\mu}, otherwise,
- it needs to be a vector of same length as \eqn{\mu}.
- When \code{context=TRUE }, the length of \eqn{\mu} is 3,
- otherwise it is 2. The default is \code{0}.
- }
- \item{tau0}{A positive integer representing the scale parameter of the
- Normal-Inverse Wishart prior for the mean and variance parameter
- \eqn{(\mu_i, \Sigma_i)} of each observation. The default is \code{2}.}
-
- \item{nu0}{A positive integer representing the prior degrees of
- freedom of the variance matrix \eqn{\Sigma_i}. the default is \code{4}.
- }
- \item{S0}{A positive scalar or a positive definite matrix that specifies
- the prior scale matrix for the variance matrix \eqn{\Sigma_i}. If it is
- a scalar, then the prior scale matrix will be a diagonal matrix with
- the same dimensions as \eqn{\Sigma_i} and the diagonal elements all
- take value of \code{S0}, otherwise \code{S0} needs to have same
- dimensions as \eqn{\Sigma_i}. When \code{context=TRUE}, \eqn{\Sigma} is a
- \eqn{3 \times 3} matrix, otherwise, it is \eqn{2 \times 2}.
- The default is \code{10}.
- }
- \item{alpha}{A positive scalar representing a user-specified fixed
- value of the concentration parameter, \eqn{\alpha}. If \code{NULL},
- \eqn{\alpha} will be updated at each Gibbs draw, and its prior
- parameters \code{a0} and \code{b0} need to be specified. The default
- is \code{NULL}.
- }
- \item{a0}{A positive integer representing the value of shape parameter
- of the gamma prior distribution for \eqn{\alpha}. The default is \code{1}.
- }
- \item{b0}{A positive integer representing the value of the scale
- parameter of the gamma prior distribution for \eqn{\alpha}. The
- default is \code{0.1}.
- }
- \item{parameter}{Logical. If \code{TRUE}, the Gibbs draws of the population
- parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to
- the in-sample predictions of the missing internal cells,
- \eqn{W}. The default is \code{FALSE}. This needs to be set to
- \code{TRUE} if one wishes to make population inferences through
- \code{predict.eco}. See an example below.
- }
- \item{grid}{Logical. If \code{TRUE}, the grid method is used to sample
- \eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis
- algorithm is used where candidate draws are sampled from the uniform
- distribution on the tomography line for each unit. Note that the
- grid method is significantly slower than the Metropolis algorithm.
- }
- \item{n.draws}{A positive integer. The number of MCMC draws.
- The default is \code{5000}.
- }
- \item{burnin}{A positive integer. The burnin interval for the Markov
- chain; i.e. the number of initial draws that should not be stored. The
- default is \code{0}.
- }
- \item{thin}{A positive integer. The thinning interval for the
- Markov chain; i.e. the number of Gibbs draws between the recorded
- values that are skipped. The default is \code{0}.
- }
- \item{verbose}{Logical. If \code{TRUE}, the progress of the Gibbs
- sampler is printed to the screen. The default is \code{FALSE}.
- }
+\item{formula}{A symbolic description of the model to be fit, specifying the
+column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X}
+specifies \code{Y} as the column margin (e.g., turnout) and \code{X} as the
+row margin (e.g., percent African-American). Details and specific examples
+are given below.}
+
+\item{data}{An optional data frame in which to interpret the variables in
+\code{formula}. The default is the environment in which \code{ecoNP} is
+called.}
+
+\item{N}{An optional variable representing the size of the unit; e.g., the
+total number of voters. \code{N} needs to be a vector of same length as
+\code{Y} and \code{X} or a scalar.}
+
+\item{supplement}{An optional matrix of supplemental data. The matrix has
+two columns, which contain additional individual-level data such as survey
+data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no
+additional individual-level data are included in the model. The default is
+\code{NULL}.}
+
+\item{context}{Logical. If \code{TRUE}, the contextual effect is also
+modeled, that is to assume the row margin \eqn{X} and the unknown \eqn{W_1}
+and \eqn{W_2} are correlated. See Imai, Lu and Strauss (2008, 2011) for
+details. The default is \code{FALSE}.}
+
+\item{mu0}{A scalar or a numeric vector that specifies the prior mean for
+the mean parameter \eqn{\mu} of the base prior distribution \eqn{G_0} (see
+Imai, Lu and Strauss (2008, 2011) for detailed descriptions of Dirichlete
+prior and the normal base prior distribution) . If it is a scalar, then its
+value will be repeated to yield a vector of the length of \eqn{\mu},
+otherwise, it needs to be a vector of same length as \eqn{\mu}. When
+\code{context=TRUE }, the length of \eqn{\mu} is 3, otherwise it is 2. The
+default is \code{0}.}
+
+\item{tau0}{A positive integer representing the scale parameter of the
+Normal-Inverse Wishart prior for the mean and variance parameter
+\eqn{(\mu_i, \Sigma_i)} of each observation. The default is \code{2}.}
+
+\item{nu0}{A positive integer representing the prior degrees of freedom of
+the variance matrix \eqn{\Sigma_i}. the default is \code{4}.}
+
+\item{S0}{A positive scalar or a positive definite matrix that specifies the
+prior scale matrix for the variance matrix \eqn{\Sigma_i}. If it is a
+scalar, then the prior scale matrix will be a diagonal matrix with the same
+dimensions as \eqn{\Sigma_i} and the diagonal elements all take value of
+\code{S0}, otherwise \code{S0} needs to have same dimensions as
+\eqn{\Sigma_i}. When \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3}
+matrix, otherwise, it is \eqn{2 \times 2}. The default is \code{10}.}
+
+\item{alpha}{A positive scalar representing a user-specified fixed value of
+the concentration parameter, \eqn{\alpha}. If \code{NULL}, \eqn{\alpha} will
+be updated at each Gibbs draw, and its prior parameters \code{a0} and
+\code{b0} need to be specified. The default is \code{NULL}.}
+
+\item{a0}{A positive integer representing the value of shape parameter of
+the gamma prior distribution for \eqn{\alpha}. The default is \code{1}.}
+
+\item{b0}{A positive integer representing the value of the scale parameter
+of the gamma prior distribution for \eqn{\alpha}. The default is \code{0.1}.}
+
+\item{parameter}{Logical. If \code{TRUE}, the Gibbs draws of the population
+parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to the
+in-sample predictions of the missing internal cells, \eqn{W}. The default is
+\code{FALSE}. This needs to be set to \code{TRUE} if one wishes to make
+population inferences through \code{predict.eco}. See an example below.}
+
+\item{grid}{Logical. If \code{TRUE}, the grid method is used to sample
+\eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis algorithm is
+used where candidate draws are sampled from the uniform distribution on the
+tomography line for each unit. Note that the grid method is significantly
+slower than the Metropolis algorithm.}
+
+\item{n.draws}{A positive integer. The number of MCMC draws. The default is
+\code{5000}.}
+
+\item{burnin}{A positive integer. The burnin interval for the Markov chain;
+i.e. the number of initial draws that should not be stored. The default is
+\code{0}.}
+
+\item{thin}{A positive integer. The thinning interval for the Markov chain;
+i.e. the number of Gibbs draws between the recorded values that are skipped.
+The default is \code{0}.}
+
+\item{verbose}{Logical. If \code{TRUE}, the progress of the Gibbs sampler is
+printed to the screen. The default is \code{FALSE}.}
+}
+\value{
+An object of class \code{ecoNP} containing the following elements:
+\item{call}{The matched call.}
+\item{X}{The row margin, \eqn{X}.}
+\item{Y}{The column margin, \eqn{Y}.}
+\item{burnin}{The number of initial burnin draws.}
+\item{thin}{The thinning interval.}
+\item{nu0}{The prior degrees of freedom.}
+\item{tau0}{The prior scale parameter.}
+\item{mu0}{The prior mean.}
+\item{S0}{The prior scale matrix.}
+\item{a0}{The prior shape parameter.}
+\item{b0}{The prior scale parameter.}
+\item{W}{A three dimensional array storing the posterior in-sample predictions
+of \eqn{W}. The first dimension indexes the Monte Carlo draws, the second dimension
+indexes the columns of the table, and the third dimension represents the observations.}
+\item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.}
+\item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.}
+The following additional elements are included in the output when
+\code{parameter = TRUE}.
+\item{mu}{A three dimensional array storing the
+posterior draws of the population mean parameter, \eqn{\mu}. The first
+dimension indexes the Monte Carlo draws, the second dimension indexes the
+columns of the table, and the third dimension represents the observations.}
+\item{Sigma}{A three dimensional array storing the posterior draws of the
+population variance matrix, \eqn{\Sigma}. The first dimension indexes the
+Monte Carlo draws, the second dimension indexes the parameters, and the
+third dimension represents the observations. }
+\item{alpha}{The posterior draws of \eqn{\alpha}.}
+\item{nstar}{The number of clusters at each Gibbs draw.}
+}
+\description{
+\code{ecoNP} is used to fit the nonparametric Bayesian model (based on a
+Dirichlet process prior) for ecological inference in \eqn{2 \times 2} tables
+via Markov chain Monte Carlo. It gives the in-sample predictions as well as
+out-of-sample predictions for population inference. The models and
+algorithms are described in Imai, Lu and Strauss (2008, 2011).
}
-
\examples{
+
## load the registration data
data(reg)
@@ -157,58 +179,26 @@ summary(res1)
## out-of sample prediction
pres1 <- predict(res1)
summary(pres1)}
-}
-\value{
- An object of class \code{ecoNP} containing the following elements:
- \item{call}{The matched call.}
- \item{X}{The row margin, \eqn{X}.}
- \item{Y}{The column margin, \eqn{Y}.}
- \item{burnin}{The number of initial burnin draws.}
- \item{thin}{The thinning interval.}
- \item{nu0}{The prior degrees of freedom.}
- \item{tau0}{The prior scale parameter.}
- \item{mu0}{The prior mean.}
- \item{S0}{The prior scale matrix.}
- \item{a0}{The prior shape parameter.}
- \item{b0}{The prior scale parameter.}
- \item{W}{A three dimensional array storing the posterior in-sample
- predictions of \eqn{W}. The first dimension indexes the Monte Carlo
- draws, the second dimension indexes the columns of the table, and the
- third dimension represents the observations.}
- \item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.}
- \item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.}
- The following additional elements are included in the output when
- \code{parameter = TRUE}.
- \item{mu}{A three dimensional array storing the posterior draws of the
- population mean parameter, \eqn{\mu}. The first dimension indexes the
- Monte Carlo draws, the second dimension indexes the columns of the
- table, and the third dimension represents the observations.}
- \item{Sigma}{A three dimensional array storing the posterior draws of
- the population variance matrix, \eqn{\Sigma}. The first dimension
- indexes the Monte Carlo draws, the second dimension indexes the
- parameters, and the third dimension represents the observations. }
- \item{alpha}{The posterior draws of \eqn{\alpha}.}
- \item{nstar}{The number of clusters at each Gibbs draw.}
}
-
-\author{
- Kosuke Imai, Department of Politics, Princeton University,
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu};
- Ying Lu, Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University
- \email{ying.lu at nyu.Edu}
+\references{
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R
+Package for Ecological Inference in 2x2 Tables} Journal of Statistical
+Software, Vol. 42, No. 5, pp. 1-23. available at
+\url{http://imai.princeton.edu/software/eco.html}
+
+Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and
+Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
+Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available
+at \url{http://imai.princeton.edu/research/eiall.html}
}
-
-\references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011).
- \dQuote{eco: R Package for Ecological Inference in 2x2 Tables}
- Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. available
- at \url{http://imai.princeton.edu/software/eco.html}
-
- Imai, Kosuke, Ying Lu and Aaron Strauss. (2008).
- \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological Tables:
- An Incomplete Data Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. available at
- \url{http://imai.princeton.edu/research/eiall.html}
+\seealso{
+\code{eco}, \code{ecoML}, \code{predict.eco}, \code{summary.ecoNP}
+}
+\author{
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
}
-
-\seealso{\code{eco}, \code{ecoML}, \code{predict.eco}, \code{summary.ecoNP}}
\keyword{models}
diff --git a/man/forgnlit30.Rd b/man/forgnlit30.Rd
index 668cac2..9dfdc65 100644
--- a/man/forgnlit30.Rd
+++ b/man/forgnlit30.Rd
@@ -1,37 +1,26 @@
-\name{forgnlit30}
-
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/forgnlit30.R
\docType{data}
-
+\name{forgnlit30}
\alias{forgnlit30}
-
\title{Foreign-born literacy in 1930}
-
-\description{
- This data set contains, on a state level, the proportion of
- white residents ten years and older who are foreign born, and
- the proportion of those residents who are literate. Data come from
- the 1930 census and were first analyzed by Robinson (1950).
-}
-
-\usage{data(forgnlit30)}
-
\format{A data frame containing 5 variables and 48 observations
- \tabular{lll}{
- X \tab numeric \tab proportion of the white population at least 10
- years of age that is foreign born \cr
- Y \tab numeric \tab proportion of the white population at least 10
- years of age that is illiterate \cr
- W1 \tab numeric \tab proportion of the foreign-born white population
- at least 10 years of age that is illiterate \cr
- W2 \tab numeric \tab proportion of the native-born white population
- at least 10 years of age that is illiterate \cr
- ICPSR \tab numeric \tab the ICPSR state code
- }
+\tabular{lll}{ X \tab numeric \tab proportion of the white population at
+least 10 years of age that is foreign born \cr Y \tab numeric \tab
+proportion of the white population at least 10 years of age that is
+illiterate \cr W1 \tab numeric \tab proportion of the foreign-born white
+population at least 10 years of age that is illiterate \cr W2 \tab numeric
+\tab proportion of the native-born white population at least 10 years of age
+that is illiterate \cr ICPSR \tab numeric \tab the ICPSR state code }}
+\description{
+This data set contains, on a state level, the proportion of white residents
+ten years and older who are foreign born, and the proportion of those
+residents who are literate. Data come from the 1930 census and were first
+analyzed by Robinson (1950).
}
\references{
- Robinson, W.S. (1950). ``Ecological Correlations and the Behavior
- of Individuals.'' \emph{American Sociological Review}, vol. 15,
- pp.351-357.
+Robinson, W.S. (1950). ``Ecological Correlations and the
+Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15,
+pp.351-357.
}
-
\keyword{datasets}
diff --git a/man/forgnlit30c.Rd b/man/forgnlit30c.Rd
index 3478497..eed66f5 100644
--- a/man/forgnlit30c.Rd
+++ b/man/forgnlit30c.Rd
@@ -1,39 +1,28 @@
-\name{forgnlit30c}
-
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/forgnlit30c.R
\docType{data}
-
+\name{forgnlit30c}
\alias{forgnlit30c}
-
\title{Foreign-born literacy in 1930, County Level}
-
-\description{
- This data set contains, on a county level, the proportion of
- white residents ten years and older who are foreign born, and
- the proportion of those residents who are literate. Data come from
- the 1930 census and were first analyzed by Robinson (1950). Counties
- with fewer than 100 foreign born residents are dropped.
-}
-
-\usage{data(forgnlit30c)}
-
\format{A data frame containing 6 variables and 1976 observations
- \tabular{lll}{
- X \tab numeric \tab proportion of the white population at least 10
- years of age that is foreign born \cr
- Y \tab numeric \tab proportion of the white population at least 10
- years of age that is illiterate \cr
- W1 \tab numeric \tab proportion of the foreign-born white population
- at least 10 years of age that is illiterate \cr
- W2 \tab numeric \tab proportion of the native-born white population
- at least 10 years of age that is illiterate \cr
- state \tab numeric \tab the ICPSR state code \cr
- county \tab numeric \tab the ICPSR (within state) county code
- }
+\tabular{lll}{ X \tab numeric \tab proportion of the white population at
+least 10 years of age that is foreign born \cr Y \tab numeric \tab
+proportion of the white population at least 10 years of age that is
+illiterate \cr W1 \tab numeric \tab proportion of the foreign-born white
+population at least 10 years of age that is illiterate \cr W2 \tab numeric
+\tab proportion of the native-born white population at least 10 years of age
+that is illiterate \cr state \tab numeric \tab the ICPSR state code \cr
+county \tab numeric \tab the ICPSR (within state) county code }}
+\description{
+This data set contains, on a county level, the proportion of white residents
+ten years and older who are foreign born, and the proportion of those
+residents who are literate. Data come from the 1930 census and were first
+analyzed by Robinson (1950). Counties with fewer than 100 foreign born
+residents are dropped.
}
\references{
- Robinson, W.S. (1950). ``Ecological Correlations and the Behavior
- of Individuals.'' \emph{American Sociological Review}, vol. 15,
- pp.351-357.
+Robinson, W.S. (1950). ``Ecological Correlations and the
+Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15,
+pp.351-357.
}
-
\keyword{datasets}
diff --git a/man/housep88.Rd b/man/housep88.Rd
index 5f3d73f..252c307 100644
--- a/man/housep88.Rd
+++ b/man/housep88.Rd
@@ -1,44 +1,31 @@
-\name{housep88}
-
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/housep88.R
\docType{data}
-
+\name{housep88}
\alias{housep88}
-
-\title{Electoral Results for the House and Presidential Races in 1988}
-
+\title{Electoral Results for the House and Presidential Races in 1988}
+\format{A data frame containing 5 variables and 424 observations
+\tabular{lll}{ X \tab numeric \tab proportion voting for the Democrat in the
+presidential race \cr Y \tab numeric \tab proportion voting for the Democrat
+in the House race \cr N \tab numeric \tab number of major party voters in
+the presidential contest \cr HPCT \tab numeric \tab House election turnout
+divided by presidential election turnout (set to 1 if House turnout exceeds
+presidential turnout) \cr DIST \tab numeric \tab 4-digit ICPSR state and
+district code: first 2 digits for the state code, last two digits for the
+district number (e.g., 2106=IL 6th) }}
\description{
- This data set contains, on a House district level, the percentage of the
- vote for the Democratic House candidate, the percentage of the vote for
- the Democratic presidential candidate (Dukakis), the number of voters who
- voted for a major party candidate in the presidential race, and the ratio
- of voters in the House race versus the number who cast a ballot for
- President. Eleven (11) uncontested races are not included. Dataset
- compiled and analyzed by Burden and Kimball (1988). Complete dataset and
- documentation available at ICSPR study number 1140.
-}
-
-\usage{data(housep88)}
-
-\format{A data frame containing 5 variables and 424 observations
- \tabular{lll}{
- X \tab numeric \tab proportion voting for the Democrat in the
- presidential race \cr
- Y \tab numeric \tab proportion voting for the Democrat in the
- House race \cr
- N \tab numeric \tab number of major party voters in the presidential
- contest \cr
- HPCT \tab numeric \tab House election turnout divided by presidential
- election turnout (set to 1 if House turnout exceeds presidential
- turnout) \cr
- DIST \tab numeric \tab 4-digit ICPSR state and district code: first
- 2 digits for the state code, last two digits for the district
- number (e.g., 2106=IL 6th)
- }
+This data set contains, on a House district level, the percentage of the
+vote for the Democratic House candidate, the percentage of the vote for the
+Democratic presidential candidate (Dukakis), the number of voters who voted
+for a major party candidate in the presidential race, and the ratio of
+voters in the House race versus the number who cast a ballot for President.
+Eleven (11) uncontested races are not included. Dataset compiled and
+analyzed by Burden and Kimball (1988). Complete dataset and documentation
+available at ICSPR study number 1140.
}
\references{
- Burden, Barry C. and David C. Kimball (1988). ``A New Approach To Ticket-
- Splitting.'' The American Political Science Review. vol 92.,
- no. 3, pp. 553-544.
+Burden, Barry C. and David C. Kimball (1988). ``A New Approach
+To Ticket- Splitting.'' The American Political Science Review. vol 92., no.
+3, pp. 553-544.
}
-
\keyword{datasets}
diff --git a/man/predict.eco.Rd b/man/predict.eco.Rd
index 0a51af2..32eaeb7 100644
--- a/man/predict.eco.Rd
+++ b/man/predict.eco.Rd
@@ -1,81 +1,64 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predict.eco.R
\name{predict.eco}
-
\alias{predict.eco}
-\alias{predict.ecoX}
-
-\title{Out-of-Sample Posterior Prediction under the Parametric Bayesian
-Model for Ecological Inference in 2x2 Tables}
-
-\description{
- Obtains out-of-sample posterior predictions under the fitted
- parametric Bayesian model for ecological
- inference. \code{predict} method for class \code{eco} and \code{ecoX}.
-}
-
+\title{Out-of-Sample Posterior Prediction under the Parametric Bayesian Model for
+Ecological Inference in 2x2 Tables}
\usage{
- \method{predict}{eco}(object, newdraw = NULL, subset = NULL,
- verbose = FALSE, ...)
- \method{predict}{ecoX}(object, newdraw = NULL, subset = NULL,
- newdata = NULL, cond = FALSE, verbose = FALSE, ...)
+\method{predict}{eco}(object, newdraw = NULL, subset = NULL,
+ verbose = FALSE, ...)
}
-
\arguments{
- \item{object}{An output object from \code{eco} or \code{ecoNP}.}
- \item{newdraw}{An optional list containing two matrices (or three
- dimensional arrays for the nonparametric model) of MCMC draws
- of \eqn{\mu} and \eqn{\Sigma}. Those elements should be named as
- \code{mu} and \code{Sigma}, respectively. The default is the
- original MCMC draws stored in \code{object}.
- }
- \item{newdata}{An optional data frame containing a new data set for
- which posterior predictions will be made. The new data set must have
- the same variable names as those in the original data.
- }
- \item{subset}{A scalar or numerical vector specifying the row
- number(s) of \code{mu} and \code{Sigma} in the output object from
- \code{eco}. If specified, the posterior draws of parameters for
- those rows are used for posterior prediction. The default is
- \code{NULL} where all the posterior draws are used.
- }
- \item{cond}{logical. If \code{TRUE}, then the conditional prediction
- will made for the parametric model with contextual effects. The
- default is \code{FALSE}.
- }
- \item{verbose}{logical. If \code{TRUE}, helpful messages along with a
- progress report on the Monte Carlo sampling from the posterior
- predictive distributions are printed on the screen. The default is
- \code{FALSE}.
- }
- \item{...}{further arguments passed to or from other methods.}
-}
+\item{object}{An output object from \code{eco} or \code{ecoNP}.}
-\details{The posterior predictive values are computed using the
- Monte Carlo sample stored in the \code{eco} output (or other sample if
- \code{newdraw} is specified). Given each Monte Carlo sample of the
- parameters, we sample the vector-valued latent variable from the
- appropriate multivariate Normal distribution. Then, we apply the
- inverse logit transformation to obtain the predictive values of
- proportions, \eqn{W}. The computation may be slow (especially for the
- nonparametric model) if a large Monte Carlo sample of the model
- parameters is used. In either case, setting \code{verbose = TRUE} may
- be helpful in monitoring the progress of the code.
-}
+\item{newdraw}{An optional list containing two matrices (or three
+dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu}
+and \eqn{\Sigma}. Those elements should be named as \code{mu} and
+\code{Sigma}, respectively. The default is the original MCMC draws stored in
+\code{object}.}
-\value{
- \code{predict.eco} yields a matrix of class \code{predict.eco}
- containing the Monte Carlo sample from the posterior predictive
- distribution of inner cells of ecological
- tables. \code{summary.predict.eco} will summarize the output, and
- \code{print.summary.predict.eco} will print the summary.
-}
+\item{subset}{A scalar or numerical vector specifying the row number(s) of
+\code{mu} and \code{Sigma} in the output object from \code{eco}. If
+specified, the posterior draws of parameters for those rows are used for
+posterior prediction. The default is \code{NULL} where all the posterior
+draws are used.}
-\seealso{\code{eco}, \code{predict.ecoNP}}
+\item{verbose}{logical. If \code{TRUE}, helpful messages along with a
+progress report on the Monte Carlo sampling from the posterior predictive
+distributions are printed on the screen. The default is \code{FALSE}.}
+\item{...}{further arguments passed to or from other methods.}
+}
+\value{
+\code{predict.eco} yields a matrix of class \code{predict.eco}
+containing the Monte Carlo sample from the posterior predictive distribution
+of inner cells of ecological tables. \code{summary.predict.eco} will
+summarize the output, and \code{print.summary.predict.eco} will print the
+summary.
+}
+\description{
+Obtains out-of-sample posterior predictions under the fitted parametric
+Bayesian model for ecological inference. \code{predict} method for class
+\code{eco} and \code{ecoX}.
+}
+\details{
+The posterior predictive values are computed using the Monte Carlo sample
+stored in the \code{eco} output (or other sample if \code{newdraw} is
+specified). Given each Monte Carlo sample of the parameters, we sample the
+vector-valued latent variable from the appropriate multivariate Normal
+distribution. Then, we apply the inverse logit transformation to obtain the
+predictive values of proportions, \eqn{W}. The computation may be slow
+(especially for the nonparametric model) if a large Monte Carlo sample of
+the model parameters is used. In either case, setting \code{verbose = TRUE}
+may be helpful in monitoring the progress of the code.
+}
+\seealso{
+\code{eco}, \code{predict.ecoNP}
+}
\author{
- Kosuke Imai, Department of Politics, Princeton University,
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu};
- Ying Lu, Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University
- \email{ying.lu at nyu.Edu}
- }
-
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
+}
\keyword{methods}
diff --git a/man/predict.ecoNP.Rd b/man/predict.ecoNP.Rd
index de3a31c..5119e69 100644
--- a/man/predict.ecoNP.Rd
+++ b/man/predict.ecoNP.Rd
@@ -1,84 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predict.ecoNP.R
\name{predict.ecoNP}
-
\alias{predict.ecoNP}
-\alias{predict.ecoNPX}
+\title{Out-of-Sample Posterior Prediction under the Nonparametric Bayesian Model
+for Ecological Inference in 2x2 Tables}
+\usage{
+\method{predict}{ecoNP}(object, newdraw = NULL, subset = NULL, obs = NULL,
+ verbose = FALSE, ...)
+}
+\arguments{
+\item{object}{An output object from \code{ecoNP}.}
-\title{Out-of-Sample Posterior Prediction under the
- Nonparametric Bayesian Model for Ecological Inference in 2x2 Tables}
+\item{newdraw}{An optional list containing two matrices (or three
+dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu}
+and \eqn{\Sigma}. Those elements should be named as \code{mu} and
+\code{Sigma}, respectively. The default is the original MCMC draws stored in
+\code{object}.}
-\description{
- Obtains out-of-sample posterior predictions under the fitted
- nonparametric Bayesian model for ecological
- inference. \code{predict} method for class \code{ecoNP} and \code{ecoNPX}.
-}
+\item{subset}{A scalar or numerical vector specifying the row number(s) of
+\code{mu} and \code{Sigma} in the output object from \code{eco}. If
+specified, the posterior draws of parameters for those rows are used for
+posterior prediction. The default is \code{NULL} where all the posterior
+draws are used.}
-\usage{
- \method{predict}{ecoNP}(object, newdraw = NULL, subset = NULL, obs = NULL,
- verbose = FALSE, ...)
- \method{predict}{ecoNPX}(object, newdraw = NULL, subset = NULL, obs = NULL,
- cond = FALSE, verbose = FALSE, ...)
-}
+\item{obs}{An integer or vector of integers specifying the observation
+number(s) whose posterior draws will be used for predictions. The default is
+\code{NULL} where all the observations in the data set are selected.}
-\arguments{
- \item{object}{An output object from \code{ecoNP}.}
- \item{newdraw}{An optional list containing two matrices (or three
- dimensional arrays for the nonparametric model) of MCMC draws
- of \eqn{\mu} and \eqn{\Sigma}. Those elements should be named as
- \code{mu} and \code{Sigma}, respectively. The default is the
- original MCMC draws stored in \code{object}.
- }
- \item{subset}{A scalar or numerical vector specifying the row
- number(s) of \code{mu} and \code{Sigma} in the output object from
- \code{eco}. If specified, the posterior draws of parameters for
- those rows are used for posterior prediction. The default is
- \code{NULL} where all the posterior draws are used.
- }
- \item{obs}{An integer or vector of integers specifying the observation
- number(s) whose posterior draws will be used for predictions. The
- default is \code{NULL} where all the observations in the data set
- are selected.
- }
- \item{cond}{logical. If \code{TRUE}, then the conditional prediction
- will made for the parametric model with contextual effects. The
- default is \code{FALSE}.
- }
- \item{verbose}{logical. If \code{TRUE}, helpful messages along with a
- progress report on the Monte Carlo sampling from the posterior
- predictive distributions are printed on the screen. The default is
- \code{FALSE}.
- }
- \item{...}{further arguments passed to or from other methods.}
-}
+\item{verbose}{logical. If \code{TRUE}, helpful messages along with a
+progress report on the Monte Carlo sampling from the posterior predictive
+distributions are printed on the screen. The default is \code{FALSE}.}
-\details{The posterior predictive values are computed using the
- Monte Carlo sample stored in the \code{eco} or \code{ecoNP} output
- (or other sample if
- \code{newdraw} is specified). Given each Monte Carlo sample of the
- parameters, we sample the vector-valued latent variable from the
- appropriate multivariate Normal distribution. Then, we apply the
- inverse logit transformation to obtain the predictive values of
- proportions, \eqn{W}. The computation may be slow (especially for the
- nonparametric model) if a large Monte Carlo sample of the model
- parameters is used. In either case, setting \code{verbose = TRUE} may
- be helpful in monitoring the progress of the code.
+\item{...}{further arguments passed to or from other methods.}
}
-
\value{
- \code{predict.eco} yields a matrix of class \code{predict.eco}
- containing the Monte Carlo sample from the posterior predictive
- distribution of inner cells of ecological
- tables. \code{summary.predict.eco} will summarize the output, and
- \code{print.summary.predict.eco} will print the summary.
+\code{predict.eco} yields a matrix of class \code{predict.eco}
+containing the Monte Carlo sample from the posterior predictive distribution
+of inner cells of ecological tables. \code{summary.predict.eco} will
+summarize the output, and \code{print.summary.predict.eco} will print the
+summary.
+}
+\description{
+Obtains out-of-sample posterior predictions under the fitted nonparametric
+Bayesian model for ecological inference. \code{predict} method for class
+\code{ecoNP} and \code{ecoNPX}.
+}
+\details{
+The posterior predictive values are computed using the Monte Carlo sample
+stored in the \code{eco} or \code{ecoNP} output (or other sample if
+\code{newdraw} is specified). Given each Monte Carlo sample of the
+parameters, we sample the vector-valued latent variable from the appropriate
+multivariate Normal distribution. Then, we apply the inverse logit
+transformation to obtain the predictive values of proportions, \eqn{W}. The
+computation may be slow (especially for the nonparametric model) if a large
+Monte Carlo sample of the model parameters is used. In either case, setting
+\code{verbose = TRUE} may be helpful in monitoring the progress of the code.
+}
+\seealso{
+\code{eco}, \code{ecoNP}, \code{summary.eco}, \code{summary.ecoNP}
}
-
-\seealso{\code{eco}, \code{ecoNP}, \code{summary.eco},
- \code{summary.ecoNP}}
-
\author{
- Kosuke Imai, Department of Politics, Princeton University,
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu};
- Ying Lu, Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University
- \email{ying.lu at nyu.Edu}
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
}
-
\keyword{methods}
diff --git a/man/predict.ecoNPX.Rd b/man/predict.ecoNPX.Rd
new file mode 100644
index 0000000..7847576
--- /dev/null
+++ b/man/predict.ecoNPX.Rd
@@ -0,0 +1,72 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predict.ecoNPX.R
+\name{predict.ecoNPX}
+\alias{predict.ecoNPX}
+\title{Out-of-Sample Posterior Prediction under the Nonparametric Bayesian Model
+for Ecological Inference in 2x2 Tables}
+\usage{
+\method{predict}{ecoNPX}(object, newdraw = NULL, subset = NULL,
+ obs = NULL, cond = FALSE, verbose = FALSE, ...)
+}
+\arguments{
+\item{object}{An output object from \code{ecoNP}.}
+
+\item{newdraw}{An optional list containing two matrices (or three
+dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu}
+and \eqn{\Sigma}. Those elements should be named as \code{mu} and
+\code{Sigma}, respectively. The default is the original MCMC draws stored in
+\code{object}.}
+
+\item{subset}{A scalar or numerical vector specifying the row number(s) of
+\code{mu} and \code{Sigma} in the output object from \code{eco}. If
+specified, the posterior draws of parameters for those rows are used for
+posterior prediction. The default is \code{NULL} where all the posterior
+draws are used.}
+
+\item{obs}{An integer or vector of integers specifying the observation
+number(s) whose posterior draws will be used for predictions. The default is
+\code{NULL} where all the observations in the data set are selected.}
+
+\item{cond}{logical. If \code{TRUE}, then the conditional prediction will
+made for the parametric model with contextual effects. The default is
+\code{FALSE}.}
+
+\item{verbose}{logical. If \code{TRUE}, helpful messages along with a
+progress report on the Monte Carlo sampling from the posterior predictive
+distributions are printed on the screen. The default is \code{FALSE}.}
+
+\item{...}{further arguments passed to or from other methods.}
+}
+\value{
+\code{predict.eco} yields a matrix of class \code{predict.eco}
+containing the Monte Carlo sample from the posterior predictive distribution
+of inner cells of ecological tables. \code{summary.predict.eco} will
+summarize the output, and \code{print.summary.predict.eco} will print the
+summary.
+}
+\description{
+Obtains out-of-sample posterior predictions under the fitted nonparametric
+Bayesian model for ecological inference. \code{predict} method for class
+\code{ecoNP} and \code{ecoNPX}.
+}
+\details{
+The posterior predictive values are computed using the Monte Carlo sample
+stored in the \code{eco} or \code{ecoNP} output (or other sample if
+\code{newdraw} is specified). Given each Monte Carlo sample of the
+parameters, we sample the vector-valued latent variable from the appropriate
+multivariate Normal distribution. Then, we apply the inverse logit
+transformation to obtain the predictive values of proportions, \eqn{W}. The
+computation may be slow (especially for the nonparametric model) if a large
+Monte Carlo sample of the model parameters is used. In either case, setting
+\code{verbose = TRUE} may be helpful in monitoring the progress of the code.
+}
+\seealso{
+\code{eco}, \code{ecoNP}, \code{summary.eco}, \code{summary.ecoNP}
+}
+\author{
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
+}
+\keyword{methods}
diff --git a/man/predict.ecoX.Rd b/man/predict.ecoX.Rd
new file mode 100644
index 0000000..3526676
--- /dev/null
+++ b/man/predict.ecoX.Rd
@@ -0,0 +1,72 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predict.ecoX.R
+\name{predict.ecoX}
+\alias{predict.ecoX}
+\title{Out-of-Sample Posterior Prediction under the Parametric Bayesian Model for
+Ecological Inference in 2x2 Tables}
+\usage{
+\method{predict}{ecoX}(object, newdraw = NULL, subset = NULL,
+ newdata = NULL, cond = FALSE, verbose = FALSE, ...)
+}
+\arguments{
+\item{object}{An output object from \code{eco} or \code{ecoNP}.}
+
+\item{newdraw}{An optional list containing two matrices (or three
+dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu}
+and \eqn{\Sigma}. Those elements should be named as \code{mu} and
+\code{Sigma}, respectively. The default is the original MCMC draws stored in
+\code{object}.}
+
+\item{subset}{A scalar or numerical vector specifying the row number(s) of
+\code{mu} and \code{Sigma} in the output object from \code{eco}. If
+specified, the posterior draws of parameters for those rows are used for
+posterior prediction. The default is \code{NULL} where all the posterior
+draws are used.}
+
+\item{newdata}{An optional data frame containing a new data set for which
+posterior predictions will be made. The new data set must have the same
+variable names as those in the original data.}
+
+\item{cond}{logical. If \code{TRUE}, then the conditional prediction will
+made for the parametric model with contextual effects. The default is
+\code{FALSE}.}
+
+\item{verbose}{logical. If \code{TRUE}, helpful messages along with a
+progress report on the Monte Carlo sampling from the posterior predictive
+distributions are printed on the screen. The default is \code{FALSE}.}
+
+\item{...}{further arguments passed to or from other methods.}
+}
+\value{
+\code{predict.eco} yields a matrix of class \code{predict.eco}
+containing the Monte Carlo sample from the posterior predictive distribution
+of inner cells of ecological tables. \code{summary.predict.eco} will
+summarize the output, and \code{print.summary.predict.eco} will print the
+summary.
+}
+\description{
+Obtains out-of-sample posterior predictions under the fitted parametric
+Bayesian model for ecological inference. \code{predict} method for class
+\code{eco} and \code{ecoX}.
+}
+\details{
+The posterior predictive values are computed using the Monte Carlo sample
+stored in the \code{eco} output (or other sample if \code{newdraw} is
+specified). Given each Monte Carlo sample of the parameters, we sample the
+vector-valued latent variable from the appropriate multivariate Normal
+distribution. Then, we apply the inverse logit transformation to obtain the
+predictive values of proportions, \eqn{W}. The computation may be slow
+(especially for the nonparametric model) if a large Monte Carlo sample of
+the model parameters is used. In either case, setting \code{verbose = TRUE}
+may be helpful in monitoring the progress of the code.
+}
+\seealso{
+\code{eco}, \code{predict.ecoNP}
+}
+\author{
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
+}
+\keyword{methods}
diff --git a/man/print.summary.eco.Rd b/man/print.summary.eco.Rd
new file mode 100644
index 0000000..133c747
--- /dev/null
+++ b/man/print.summary.eco.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/print.summary.eco.R
+\name{print.summary.eco}
+\alias{print.summary.eco}
+\title{Print the Summary of the Results for the Bayesian Parametric Model for Ecological
+Inference in 2x2 Tables}
+\usage{
+\method{print}{summary.eco}(x, digits = max(3, getOption("digits") - 3), ...)
+}
+\arguments{
+\item{x}{An object of class \code{summary.eco}.}
+
+\item{digits}{the number of significant digits to use when printing.}
+
+\item{...}{further arguments passed to or from other methods.}
+}
+\value{
+\code{summary.eco} yields an object of class \code{summary.eco}
+containing the following elements:
+\item{call}{The call from \code{eco}.}
+\item{n.obs}{The number of units.}
+\item{n.draws}{The number of Monte Carlo samples.}
+\item{agg.table}{Aggregate posterior estimates of the marginal
+means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If
+\code{param = TRUE}, the following elements are also included:
+\item{param.table}{Posterior estimates of model parameters: population mean
+estimates of \eqn{W_1} and \eqn{W_2} and their logit transformations.} If
+\code{units = TRUE}, the following elements are also included:
+\item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
+\item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
+
+This object can be printed by \code{print.summary.eco}
+}
+\description{
+\code{summary} method for class \code{eco}.
+}
+\seealso{
+\code{eco}, \code{predict.eco}
+}
+\author{
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
+}
+\keyword{methods}
diff --git a/man/print.summary.ecoML.Rd b/man/print.summary.ecoML.Rd
new file mode 100644
index 0000000..8349cea
--- /dev/null
+++ b/man/print.summary.ecoML.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/print.summary.ecoML.R
+\name{print.summary.ecoML}
+\alias{print.summary.ecoML}
+\title{Print the Summary of the Results for the Maximum Likelihood Parametric Model for
+Ecological Inference in 2x2 Tables}
+\usage{
+\method{print}{summary.ecoML}(x, digits = max(3, getOption("digits") - 3),
+ ...)
+}
+\arguments{
+\item{x}{An object of class \code{summary.ecoML}.}
+
+\item{digits}{the number of significant digits to use when printing.}
+
+\item{...}{further arguments passed to or from other methods.}
+}
+\value{
+\code{summary.eco} yields an object of class \code{summary.eco}
+containing the following elements:
+\item{call}{The call from \code{eco}.}
+\item{sem}{Whether the SEM algorithm was executed, as specified by the user
+upon calling \code{ecoML}.}
+\item{fix.rho}{Whether the correlation parameter was fixed or allowed to vary,
+as specified by the user upon calling \code{ecoML}.}
+\item{epsilon}{The convergence threshold specified by the
+user upon calling \code{ecoML}.}
+\item{n.obs}{The number of units.}
+\item{iters.em}{The number iterations the EM algorithm cycled through before
+convergence or reaching the maximum number of iterations allowed.}
+\item{iters.sem}{The number iterations the SEM algorithm cycled through
+before convergence or reaching the maximum number of iterations allowed.}
+\item{loglik}{The final observed log-likelihood.}
+\item{rho}{A matrix of \code{iters.em} rows specifying the correlation parameters
+at each iteration of the EM algorithm. The number of columns depends on how many
+correlation parameters exist in the model. Column order is the same as the order of the
+parameters in \code{param.table}.}
+\item{param.table}{Final estimates of the parameter values for the model.
+Excludes parameters fixed by the user upon calling \code{ecoML}.
+See \code{ecoML} documentation for order of parameters.}
+\item{agg.table}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2}}
+\item{agg.wtable}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2}
+using \eqn{X} and \eqn{N} as weights.} If \code{units = TRUE}, the following elements
+are also included:
+\item{W.table}{Unit-level estimates for \eqn{W_1} and \eqn{W_2}.}
+
+This object can be printed by \code{print.summary.eco}
+}
+\description{
+\code{summary} method for class \code{eco}.
+}
+\seealso{
+\code{ecoML}
+}
+\author{
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}; Aaron Strauss,
+Department of Politics, Princeton University,
+\email{abstraus at Princeton.Edu}
+}
+\keyword{methods}
diff --git a/man/print.summary.ecoNP.Rd b/man/print.summary.ecoNP.Rd
new file mode 100644
index 0000000..ad387e2
--- /dev/null
+++ b/man/print.summary.ecoNP.Rd
@@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/print.summary.ecoNP.R
+\name{print.summary.ecoNP}
+\alias{print.summary.ecoNP}
+\title{Print the Summary of the Results for the Bayesian Nonparametric Model for Ecological
+Inference in 2x2 Tables}
+\usage{
+\method{print}{summary.ecoNP}(x, digits = max(3, getOption("digits") - 3),
+ ...)
+}
+\arguments{
+\item{x}{An object of class \code{summary.ecoNP}.}
+
+\item{digits}{the number of significant digits to use when printing.}
+
+\item{...}{further arguments passed to or from other methods.}
+}
+\value{
+\code{summary.ecoNP} yields an object of class \code{summary.ecoNP}
+containing the following elements:
+\item{call}{The call from \code{ecoNP}.}
+\item{n.obs}{The number of units.}
+\item{n.draws}{The number of Monte Carlo samples.}
+\item{agg.table}{Aggregate posterior estimates of the marginal
+means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If
+\code{param = TRUE}, the following elements are also included:
+\item{param.table}{Posterior estimates of model parameters: population mean
+estimates of \eqn{W_1} and \eqn{W_2}. If \code{subset} is specified, only a
+subset of the population parameters are included.} If \code{unit = TRUE},
+the following elements are also included:
+\item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
+\item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
+
+This object can be printed by \code{print.summary.ecoNP}
+}
+\description{
+\code{summary} method for class \code{ecoNP}.
+}
+\seealso{
+\code{ecoNP}, \code{predict.eco}
+}
+\author{
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
+}
+\keyword{methods}
diff --git a/man/reg.Rd b/man/reg.Rd
index 9a86276..b31edbf 100644
--- a/man/reg.Rd
+++ b/man/reg.Rd
@@ -1,33 +1,25 @@
-\name{reg}
-
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reg.R
\docType{data}
-
+\name{reg}
\alias{reg}
-
-\title{Voter Registration in US Southern States}
-
+\title{Voter Registration in US Southern States}
+\format{A data frame containing 5 variables and 275 observations
+\tabular{lll}{ X \tab numeric \tab the fraction of Black voters \cr Y \tab
+numeric \tab the fraction of voters who registered themselves\cr N \tab
+numeric \tab the total number of voters in each county \cr W1 \tab numeric
+\tab the actual fraction of Black voters who registered themselves \cr W2
+\tab numeric \tab the actual fraction of White voters who registered
+themselves }}
\description{
- This data set contains the racial composition, the registration rate,
- the number of eligible voters as well as the actual observed racial
- registration rates for every county in four US southern states:
- Florida, Louisiana, North Carolina, and South Carolina.
-}
-
-\usage{data(reg)}
-
-\format{A data frame containing 5 variables and 275 observations
- \tabular{lll}{
- X \tab numeric \tab the fraction of Black voters \cr
- Y \tab numeric \tab the fraction of voters who registered themselves\cr
- N \tab numeric \tab the total number of voters in each county \cr
- W1 \tab numeric \tab the actual fraction of Black voters who
- registered themselves \cr
- W2 \tab numeric \tab the actual fraction of White voters who registered themselves
- }
+This data set contains the racial composition, the registration rate, the
+number of eligible voters as well as the actual observed racial registration
+rates for every county in four US southern states: Florida, Louisiana, North
+Carolina, and South Carolina.
}
-\references{King, G. (1997). \dQuote{A Solution to the Ecological
- Inference Problem: Reconstructing Individual Behavior from Aggregate
- Data}. Princeton University Press, Princeton, NJ.
+\references{
+King, G. (1997). \dQuote{A Solution to the Ecological Inference
+Problem: Reconstructing Individual Behavior from Aggregate Data}. Princeton
+University Press, Princeton, NJ.
}
-
\keyword{datasets}
diff --git a/man/summary.eco.Rd b/man/summary.eco.Rd
index c3ec181..6ff6655 100644
--- a/man/summary.eco.Rd
+++ b/man/summary.eco.Rd
@@ -1,73 +1,62 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summary.eco.R
\name{summary.eco}
-
\alias{summary.eco}
\alias{print.eco}
-\alias{print.summary.eco}
+\title{Summarizing the Results for the Bayesian Parametric Model for Ecological
+Inference in 2x2 Tables}
+\usage{
+\method{summary}{eco}(object, CI = c(2.5, 97.5), param = TRUE,
+ units = FALSE, subset = NULL, ...)
+}
+\arguments{
+\item{object}{An output object from \code{eco}.}
-\title{Summarizing the Results for the Bayesian Parametric Model for
- Ecological Inference in 2x2 Tables}
+\item{CI}{A vector of lower and upper bounds for the Bayesian credible
+intervals used to summarize the results. The default is the equal tail 95
+percent credible interval.}
-\description{
- \code{summary} method for class \code{eco}.
-}
+\item{param}{Logical. If \code{TRUE}, the posterior estimates of the
+population parameters will be provided. The default value is \code{TRUE}.}
-\usage{
- \method{summary}{eco}(object, CI = c(2.5, 97.5), param = TRUE,
- units = FALSE, subset = NULL, ...)
+\item{units}{Logical. If \code{TRUE}, the in-sample predictions for each
+unit or for a subset of units will be provided. The default value is
+\code{FALSE}.}
- \method{print}{summary.eco}(x, digits = max(3, getOption("digits") - 3), ...)
-}
+\item{subset}{A numeric vector indicating the subset of the units whose
+in-sample predications to be provided when \code{units} is \code{TRUE}. The
+default value is \code{NULL} where the in-sample predictions for each unit
+will be provided.}
-\arguments{
- \item{object}{An output object from \code{eco}.}
- \item{CI}{A vector of lower and upper bounds for the Bayesian credible
- intervals used to summarize the results. The default is the
- equal tail 95 percent credible interval.
- }
- \item{x}{An object of class \code{summary.eco}.}
- \item{digits}{the number of significant digits to use when printing.}
- \item{param}{Logical. If \code{TRUE}, the posterior estimates of the
- population parameters will be provided. The default value is
- \code{TRUE}.
- }
- \item{units}{Logical. If \code{TRUE}, the in-sample predictions for
- each unit or for a subset of units will be provided. The default
- value is \code{FALSE}.
- }
- \item{subset}{A numeric vector indicating the subset of the units whose
- in-sample predications to be provided when \code{units} is
- \code{TRUE}. The default value is \code{NULL} where the in-sample
- predictions for each unit will be provided.
- }
- \item{...}{further arguments passed to or from other methods.}
+\item{...}{further arguments passed to or from other methods.}
}
-
\value{
- \code{summary.eco} yields an object of class \code{summary.eco}
- containing the following elements:
- \item{call}{The call from \code{eco}.}
- \item{n.obs}{The number of units.}
- \item{n.draws}{The number of Monte Carlo samples.}
- \item{agg.table}{Aggregate posterior estimates of the marginal means
- of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.}
- If \code{param = TRUE}, the following elements are also included:
- \item{param.table}{Posterior estimates of model parameters: population
- mean estimates of \eqn{W_1} and \eqn{W_2} and their logit
- transformations.}
- If \code{units = TRUE}, the following elements are also included:
- \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
- \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
-
- This object can be printed by \code{print.summary.eco}
+\code{summary.eco} yields an object of class \code{summary.eco}
+containing the following elements:
+\item{call}{The call from \code{eco}.}
+\item{n.obs}{The number of units.}
+\item{n.draws}{The number of Monte Carlo samples.}
+\item{agg.table}{Aggregate posterior estimates of the marginal
+means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If
+\code{param = TRUE}, the following elements are also included:
+\item{param.table}{Posterior estimates of model parameters: population mean
+estimates of \eqn{W_1} and \eqn{W_2} and their logit transformations.} If
+\code{units = TRUE}, the following elements are also included:
+\item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
+\item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
+
+This object can be printed by \code{print.summary.eco}
+}
+\description{
+\code{summary} method for class \code{eco}.
+}
+\seealso{
+\code{eco}, \code{predict.eco}
}
-
-\seealso{\code{eco}, \code{predict.eco}}
-
\author{
- Kosuke Imai, Department of Politics, Princeton University,
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu};
- Ying Lu, Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University
- \email{ying.lu at nyu.Edu}
- }
-
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
+}
\keyword{methods}
diff --git a/man/summary.ecoML.Rd b/man/summary.ecoML.Rd
index d26047e..e4f9088 100644
--- a/man/summary.ecoML.Rd
+++ b/man/summary.ecoML.Rd
@@ -1,88 +1,77 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summary.ecoML.R
\name{summary.ecoML}
-
\alias{summary.ecoML}
-\alias{print.summary.ecoML}
-
\title{Summarizing the Results for the Maximum Likelihood Parametric Model for
- Ecological Inference in 2x2 Tables}
-
-\description{
- \code{summary} method for class \code{eco}.
+Ecological Inference in 2x2 Tables}
+\usage{
+\method{summary}{ecoML}(object, CI = c(2.5, 97.5), param = TRUE,
+ units = FALSE, subset = NULL, ...)
}
+\arguments{
+\item{object}{An output object from \code{eco}.}
-\usage{
- \method{summary}{ecoML}(object, CI = c(2.5, 97.5), param = TRUE, units = FALSE,
- subset = NULL, ...)
+\item{CI}{A vector of lower and upper bounds for the Bayesian credible
+intervals used to summarize the results. The default is the equal tail 95
+percent credible interval.}
- \method{print}{summary.ecoML}(x, digits = max(3, getOption("digits") - 3), ...)
-}
+\item{param}{Ignored.}
-\arguments{
- \item{object}{An output object from \code{eco}.}
- \item{CI}{A vector of lower and upper bounds for the Bayesian credible
- intervals used to summarize the results. The default is the
- equal tail 95 percent credible interval.
- }
- \item{param}{Ignored.}
- \item{subset}{A numeric vector indicating the subset of the units whose
- in-sample predications to be provided when \code{units} is
- \code{TRUE}. The default value is \code{NULL} where the in-sample
- predictions for each unit will be provided.
- }
- \item{units}{Logical. If \code{TRUE}, the in-sample predictions for
- each unit or for a subset of units will be provided. The default
- value is \code{FALSE}.
- }
-
- \item{x}{An object of class \code{summary.ecoML}.}
- \item{digits}{the number of significant digits to use when printing.}
-
- \item{...}{further arguments passed to or from other methods.}
-}
+\item{units}{Logical. If \code{TRUE}, the in-sample predictions for each
+unit or for a subset of units will be provided. The default value is
+\code{FALSE}.}
-\value{
- \code{summary.eco} yields an object of class \code{summary.eco}
- containing the following elements:
- \item{call}{The call from \code{eco}.}
- \item{sem}{Whether the SEM algorithm was executed, as specified by the
- user upon calling \code{ecoML}.}
- \item{fix.rho}{Whether the correlation parameter was fixed or allowed to
- vary, as specified by the user upon calling \code{ecoML}.}
- \item{epsilon}{The convergence threshold specified by the user upon
- calling \code{ecoML}.}
- \item{n.obs}{The number of units.}
- \item{iters.em}{The number iterations the EM algorithm cycled through
- before convergence or reaching the maximum number of iterations
- allowed.}
- \item{iters.sem}{The number iterations the SEM algorithm cycled through
- before convergence or reaching the maximum number of iterations
- allowed.}
- \item{loglik}{The final observed log-likelihood.}
- \item{rho}{A matrix of \code{iters.em} rows specifying the correlation
- parameters at each iteration of the EM algorithm. The number of columns
- depends on how many correlation parameters exist in the model. Column
- order is the same as the order of the parameters in \code{param.table}.}
- \item{param.table}{Final estimates of the parameter values for the model.
- Excludes parameters fixed by the user upon calling \code{ecoML}.
- See \code{ecoML} documentation for order of parameters.}
- \item{agg.table}{Aggregate estimates of the marginal means
- of \eqn{W_1} and \eqn{W_2}}
- \item{agg.wtable}{Aggregate estimates of the marginal means
- of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.}
- If \code{units = TRUE}, the following elements are also included:
- \item{W.table}{Unit-level estimates for \eqn{W_1} and \eqn{W_2}.}
+\item{subset}{A numeric vector indicating the subset of the units whose
+in-sample predications to be provided when \code{units} is \code{TRUE}. The
+default value is \code{NULL} where the in-sample predictions for each unit
+will be provided.}
- This object can be printed by \code{print.summary.eco}
+\item{...}{further arguments passed to or from other methods.}
}
+\value{
+\code{summary.eco} yields an object of class \code{summary.eco}
+containing the following elements:
+\item{call}{The call from \code{eco}.}
+\item{sem}{Whether the SEM algorithm was executed, as specified by the user
+upon calling \code{ecoML}.}
+\item{fix.rho}{Whether the correlation parameter was fixed or allowed to
+vary, as specified by the user upon calling \code{ecoML}.}
+\item{epsilon}{The convergence threshold specified by the user upon
+calling \code{ecoML}.}
+\item{n.obs}{The number of units.}
+\item{iters.em}{The number iterations the EM algorithm cycled through before
+convergence or reaching the maximum number of iterations allowed.}
+\item{iters.sem}{The number iterations the SEM algorithm cycled through
+before convergence or reaching the maximum number of iterations allowed.}
+\item{loglik}{The final observed log-likelihood.}
+\item{rho}{A matrix of \code{iters.em} rows specifying the correlation parameters at each iteration
+of the EM algorithm. The number of columns depends on how many correlation
+parameters exist in the model. Column order is the same as the order of the
+parameters in \code{param.table}.}
+\item{param.table}{Final estimates of the parameter values for the model.
+Excludes parameters fixed by the user upon calling \code{ecoML}.
+See \code{ecoML} documentation for order of parameters.}
+\item{agg.table}{Aggregate estimates of the marginal means of
+\eqn{W_1} and \eqn{W_2}}
+\item{agg.wtable}{Aggregate estimates of the marginal means
+of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.}
+If \code{units = TRUE}, the following elements are also included:
+\item{W.table}{Unit-level estimates for \eqn{W_1} and \eqn{W_2}.}
-\seealso{\code{ecoML}}
-
+This object can be printed by \code{print.summary.eco}
+}
+\description{
+\code{summary} method for class \code{eco}.
+}
+\seealso{
+\code{ecoML}
+}
\author{
- Kosuke Imai, Department of Politics, Princeton University,
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu};
- Ying Lu, Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University
- \email{ying.lu at nyu.Edu}; Aaron Strauss, Department of Politics,
- Princeton University, \email{abstraus at Princeton.Edu}
- }
-
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}; Aaron Strauss,
+Department of Politics, Princeton University,
+\email{abstraus at Princeton.Edu}
+}
\keyword{methods}
diff --git a/man/summary.ecoNP.Rd b/man/summary.ecoNP.Rd
index 66f5943..57e3fb4 100644
--- a/man/summary.ecoNP.Rd
+++ b/man/summary.ecoNP.Rd
@@ -1,71 +1,62 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summary.ecoNP.R
\name{summary.ecoNP}
-
\alias{summary.ecoNP}
-\alias{print.summary.ecoNP}
+\title{Summarizing the Results for the Bayesian Nonparametric Model for Ecological
+Inference in 2x2 Tables}
+\usage{
+\method{summary}{ecoNP}(object, CI = c(2.5, 97.5), param = FALSE,
+ units = FALSE, subset = NULL, ...)
+}
+\arguments{
+\item{object}{An output object from \code{ecoNP}.}
-\title{Summarizing the Results for the Bayesian Nonparametric Model for
-Ecological Inference in 2x2 Tables }
+\item{CI}{A vector of lower and upper bounds for the Bayesian credible
+intervals used to summarize the results. The default is the equal tail 95
+percent credible interval.}
-\description{
- \code{summary} method for class \code{ecoNP}.
-}
+\item{param}{Logical. If \code{TRUE}, the posterior estimates of the
+population parameters will be provided. The default value is \code{FALSE}.}
-\usage{
- \method{summary}{ecoNP}(object, CI = c(2.5, 97.5), param = FALSE,
- units = FALSE, subset = NULL, ...)
+\item{units}{Logical. If \code{TRUE}, the in-sample predictions for each
+unit or for a subset of units will be provided. The default value is
+\code{FALSE}.}
- \method{print}{summary.ecoNP}(x, digits = max(3, getOption("digits") - 3), ...)
-}
+\item{subset}{A numeric vector indicating the subset of the units whose
+in-sample predications to be provided when \code{units} is \code{TRUE}. The
+default value is \code{NULL} where the in-sample predictions for each unit
+will be provided.}
-\arguments{
- \item{object}{An output object from \code{ecoNP}.}
- \item{CI}{A vector of lower and upper bounds for the Bayesian credible
- intervals used to summarize the results. The default is the equal
- tail 95 percent credible interval.
- }
- \item{x}{An object of class \code{summary.ecoNP}.}
- \item{digits}{the number of significant digits to use when printing.}
- \item{param}{Logical. If \code{TRUE}, the posterior estimates of the
- population parameters will be provided. The default value is
- \code{FALSE}.
- }
- \item{units}{Logical. If \code{TRUE}, the in-sample predictions for
- each unit or for a subset of units will be provided. The default
- value is \code{FALSE}.
- }
- \item{subset}{A numeric vector indicating the subset of the units whose
- in-sample predications to be provided when \code{units} is
- \code{TRUE}. The default value is \code{NULL} where the in-sample
- predictions for each unit will be provided.}
- \item{...}{further arguments passed to or from other methods.}
+\item{...}{further arguments passed to or from other methods.}
}
-
\value{
- \code{summary.ecoNP} yields an object of class \code{summary.ecoNP}
- containing the following elements:
- \item{call}{The call from \code{ecoNP}.}
- \item{n.obs}{The number of units.}
- \item{n.draws}{The number of Monte Carlo samples.}
- \item{agg.table}{Aggregate posterior estimates of the marginal means
- of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.}
- If \code{param = TRUE}, the following elements are also included:
- \item{param.table}{Posterior estimates of model parameters: population
- mean estimates of \eqn{W_1} and \eqn{W_2}. If \code{subset} is
- specified, only a subset of the population parameters are included.}
- If \code{unit = TRUE}, the following elements are also included:
- \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
- \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
-
- This object can be printed by \code{print.summary.ecoNP}
+\code{summary.ecoNP} yields an object of class \code{summary.ecoNP}
+containing the following elements:
+\item{call}{The call from \code{ecoNP}.}
+\item{n.obs}{The number of units.}
+\item{n.draws}{The number of Monte Carlo samples.}
+\item{agg.table}{Aggregate posterior estimates of the marginal
+means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If
+\code{param = TRUE}, the following elements are also included:
+\item{param.table}{Posterior estimates of model parameters: population mean
+estimates of \eqn{W_1} and \eqn{W_2}. If \code{subset} is specified, only a
+subset of the population parameters are included.} If \code{unit = TRUE},
+the following elements are also included:
+\item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.}
+\item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.}
+
+This object can be printed by \code{print.summary.ecoNP}
+}
+\description{
+\code{summary} method for class \code{ecoNP}.
+}
+\seealso{
+\code{ecoNP}, \code{predict.eco}
}
-
-\seealso{\code{ecoNP}, \code{predict.eco}}
-
\author{
- Kosuke Imai, Department of Politics, Princeton University,
- \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu};
- Ying Lu, Center for Promoting Research Involving Innovative Statistical Methodology (PRIISM), New York University
- \email{ying.lu at nyu.Edu}
+Kosuke Imai, Department of Politics, Princeton University,
+\email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu}; Ying Lu,
+Center for Promoting Research Involving Innovative Statistical Methodology
+(PRIISM), New York University \email{ying.lu at nyu.Edu}
}
-
\keyword{methods}
diff --git a/man/wallace.Rd b/man/wallace.Rd
index 06171ed..60b8412 100644
--- a/man/wallace.Rd
+++ b/man/wallace.Rd
@@ -1,37 +1,28 @@
-\name{wallace}
-
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wallace.R
\docType{data}
-
+\name{wallace}
\alias{wallace}
-
\title{Black voting rates for Wallace for President, 1968}
-
+\format{A data frame containing 3 variables and 1009 observations
+\tabular{lll}{
+ X \tab numeric \tab proportion of the population that is Black \cr
+ Y \tab numeric \tab proportion presidential votes cast for Wallace \cr
+ FIPS \tab numeric \tab the FIPS county code
+ }}
\description{
- This data set contains, on a county level, the proportion of
- county residents who are Black and the proportion of presidential
- votes cast for Wallace. Demographic data is based on the 1960
- census. Presidential returns are from ICPSR study 13. County data
- from 10 southern states (Alabama, Arkansas, Georgia, Florida,
- Louisiana, Mississippi, North Carolina, South Carolina, Tennessee,
- Texas) are included. (Virginia is excluded due
- to the difficulty of matching counties between the datasets.)
- This data is analyzed in Wallace and Segal (1973).
+This data set contains, on a county level, the proportion of county
+residents who are Black and the proportion of presidential votes cast for
+Wallace. Demographic data is based on the 1960 census. Presidential returns
+are from ICPSR study 13. County data from 10 southern states (Alabama,
+Arkansas, Georgia, Florida, Louisiana, Mississippi, North Carolina, South
+Carolina, Tennessee, Texas) are included. (Virginia is excluded due to the
+difficulty of matching counties between the datasets.) This data is
+analyzed in Wallace and Segal (1973).
}
-
-\usage{data(wallace)}
-
-\format{A data frame containing 3 variables and 1009 observations
- \tabular{lll}{
- X \tab numeric \tab proportion of the population that is Black \cr
- Y \tab numeric \tab proportion presidential votes cast for Wallace \cr
- FIPS \tab numeric \tab the FIPS county code
- }
+\references{
+Wasserman, Ira M. and David R. Segal (1973). ``Aggregation
+Effects in the Ecological Study of Presidential Voting.'' American Journal
+of Political Science. vol. 17, pp. 177-81.
}
-
- \references{ Wasserman, Ira M. and David R. Segal (1973).
-``Aggregation Effects in the Ecological Study of Presidential
-Voting.'' American Journal of Political Science. vol. 17, pp.
-177-81.
- }
-
\keyword{datasets}
diff --git a/src/gibbsEM.c b/src/gibbsEM.c
index ad24492..4ddaabf 100644
--- a/src/gibbsEM.c
+++ b/src/gibbsEM.c
@@ -356,7 +356,7 @@ void initTheta(double* pdTheta_in,Param* params, double* pdTheta) {
void ecoEStep(Param* params, double* suff) {
- int t_samp,n_samp,s_samp,x1_samp,x0_samp,i,j,temp0,temp1, verbose;
+ int t_samp,n_samp,s_samp,x1_samp,x0_samp,i,j, verbose;
double loglik,testdens;
Param* param; setParam* setP; caseParam* caseP;
setP=params[0].setP;
@@ -1001,7 +1001,7 @@ void initCCAR(Param* params, double* pdTheta) {
if (!setP_sem.semDone[i]) { //we're not done with this row
//step 1: set phi^t_i
if (verbose>=2) Rprintf("Theta(%d):",(i+1));
- int switch_index_ir=0; int switch_index_it;
+ int switch_index_ir=0; int switch_index_it=0;
for(j=0;j<param_len;j++) {
if (!setP_sem.varParam[j]) //const
phiTI[j]=optTheta[j];
@@ -1212,7 +1212,7 @@ void initCCAR(Param* params, double* pdTheta) {
* finalTheta: 1 if this is for the final theta -- include static variables
**/
void printColumnHeader(int main_loop, int iteration_max, setParam* setP, int finalTheta) {
- int i;
+
int param_len;
param_len = setP->param_len;
diff --git a/src/gibbsXBase.c b/src/gibbsXBase.c
index c4bfffe..d6148af 100644
--- a/src/gibbsXBase.c
+++ b/src/gibbsXBase.c
@@ -102,7 +102,7 @@ void cBaseecoX(
double **InvSigma_w = doubleMatrix(n_dim,n_dim);
/* misc variables */
- int i, j, k, t, main_loop; /* used for various loops */
+ int i, j, k, main_loop; /* used for various loops */
int itemp, itempS, itempC, itempA;
int progress = 1, itempP = ftrunc((double) *n_gen/10);
double dtemp, dtemp1;
diff --git a/src/gibbsZBase.c b/src/gibbsZBase.c
index cff8ac5..1ea562c 100644
--- a/src/gibbsZBase.c
+++ b/src/gibbsZBase.c
@@ -118,7 +118,7 @@ void cBaseecoZ(
double **R = doubleMatrix(n_dim, n_dim); /* ee' */
/* misc variables */
- int i, j, k, t, l, main_loop; /* used for various loops */
+ int i, j, k, l, main_loop; /* used for various loops */
int itemp;
int itempA=0; /* counter for alpha */
int itempB=0;
diff --git a/src/init.c b/src/init.c
new file mode 100644
index 0000000..6ee0134
--- /dev/null
+++ b/src/init.c
@@ -0,0 +1,40 @@
+#include <stdlib.h> // for NULL
+#include <R_ext/Rdynload.h>
+
+/* FIXME:
+ Check these declarations against the C/Fortran source code.
+*/
+
+/* .C calls */
+extern void cBase2C(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void cBaseeco(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void cBaseecoX(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void cBaseecoZ(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void cBaseRC(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void cDPeco(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void cDPecoX(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void cEMeco(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void preBaseX(void *, void *, void *, void *, void *, void *, void *);
+extern void preDP(void *, void *, void *, void *, void *, void *, void *);
+extern void preDPX(void *, void *, void *, void *, void *, void *, void *, void *);
+
+static const R_CMethodDef CEntries[] = {
+ {"cBase2C", (DL_FUNC) &cBase2C, 22},
+ {"cBaseeco", (DL_FUNC) &cBaseeco, 32},
+ {"cBaseecoX", (DL_FUNC) &cBaseecoX, 36},
+ {"cBaseecoZ", (DL_FUNC) &cBaseecoZ, 29},
+ {"cBaseRC", (DL_FUNC) &cBaseRC, 23},
+ {"cDPeco", (DL_FUNC) &cDPeco, 36},
+ {"cDPecoX", (DL_FUNC) &cDPecoX, 40},
+ {"cEMeco", (DL_FUNC) &cEMeco, 27},
+ {"preBaseX", (DL_FUNC) &preBaseX, 7},
+ {"preDP", (DL_FUNC) &preDP, 7},
+ {"preDPX", (DL_FUNC) &preDPX, 8},
+ {NULL, NULL, 0}
+};
+
+void R_init_eco(DllInfo *dll)
+{
+ R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
+ R_useDynamicSymbols(dll, FALSE);
+}
diff --git a/src/preBaseX.c b/src/preBaseX.c
index 4c475f1..582a388 100644
--- a/src/preBaseX.c
+++ b/src/preBaseX.c
@@ -31,7 +31,7 @@ void preBaseX(
double **Sigma = doubleMatrix(n_dim, n_dim); /* The covariance matrix */
/* misc variables */
- int i, j, k, main_loop; /* used for various loops */
+ int i, j, main_loop; /* used for various loops */
int itemp=0;
int itempM=0;
int itempS=0;
diff --git a/src/preDPX.c b/src/preDPX.c
index 9004364..c1c5f4c 100644
--- a/src/preDPX.c
+++ b/src/preDPX.c
@@ -32,7 +32,7 @@ void preDPX(
double **Sigma = doubleMatrix(n_dim, n_dim); /* The covariance matrix */
/* misc variables */
- int i, j, k, main_loop; /* used for various loops */
+ int i, j, main_loop; /* used for various loops */
int itemp = 0;
int itempM = 0;
int itempS = 0;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-eco.git
More information about the debian-science-commits
mailing list