[r-cran-eco] 09/30: Import Upstream version 3.1-1
Andreas Tille
tille at debian.org
Thu Sep 7 07:20:58 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-eco.
commit 18c20a4ab8c32c908a2c83b4c211e7841090592a
Author: Andreas Tille <tille at debian.org>
Date: Thu Sep 7 08:59:23 2017 +0200
Import Upstream version 3.1-1
---
DESCRIPTION | 28 +-
R/eco.R | 7 +-
R/ecoBD.R | 48 +-
R/ecoNP.R | 8 +-
R/emeco.R | 8 +-
R/print.summary.eco.R | 10 +-
R/print.summary.ecoML.R | 8 +-
R/print.summary.ecoNP.R | 18 +-
R/print.summary.predict.eco.R | 2 +-
man/eco.Rd | 94 ++--
man/ecoBD.Rd | 32 +-
man/ecoML.Rd | 51 +-
man/ecoNP.Rd | 65 ++-
man/predict.ecoNP.Rd | 2 +-
man/summary.ecoML.Rd | 5 +-
src/bayes.c | 24 +-
src/fintegrate.c | 328 +++++++------
src/gibbsBase.c | 46 +-
src/gibbsDP.c | 167 +++++--
src/gibbsEM.c | 1062 ++++++++++++++++++++++-------------------
src/macros.h | 24 +-
src/rand.c | 72 +--
22 files changed, 1232 insertions(+), 877 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 61abba2..e44c795 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,27 +1,27 @@
Package: eco
-Version: 3.0-2
-Date: 2007-1-11
+Version: 3.1-1
+Date: 2007-6-27
Title: R Package for Ecological Inference in 2x2 Tables
Author: Kosuke Imai <kimai at princeton.edu>,
Ying Lu <ying.lu at colorado.edu>,
Aaron Strauss <abstraus at Princeton.EDU>.
-Maintainer: Kosuke Imai <kimai at princeton.edu>
+Maintainer: Ying Lu <ying.lu at colorado.edu>
Depends: R (>= 2.0), MASS
Description: eco is a publicly available R package that implements the
- Bayesian and likelihood methods proposed in Imai, Lu, and Strauss (2006)
- for ecological inference in $2 \times 2$ tables as well as the
- method of bounds introduced by Duncan and Davis (1953). The package
- fits both parametric and nonparametric models using either the
- Expectation-Maximization algorithms (for likelihood models) or the
- Markov chain Monte Carlo algorithms (for Bayesian models). For all
- models, the individual-level data can be directly incorporated into
- the estimation whenever such data are available. Along with
- in-sample and out-of-sample predictions, the package also provides a
- functionality which allows one to quantify the effect of data
+ Bayesian and likelihood methods proposed in Imai, Lu, and Strauss
+ (Forthcoming) for ecological inference in $2 \times 2$ tables as
+ well as the method of bounds introduced by Duncan and Davis (1953).
+ The package fits both parametric and nonparametric models using
+ either the Expectation-Maximization algorithms (for likelihood
+ models) or the Markov chain Monte Carlo algorithms (for Bayesian
+ models). For all models, the individual-level data can be directly
+ incorporated into the estimation whenever such data are available.
+ Along with in-sample and out-of-sample predictions, the package also
+ provides a functionality which allows one to quantify the effect of data
aggregation on parameter estimation and hypothesis testing under the
parametric likelihood models.
LazyLoad: yes
LazyData: yes
License: GPL (version 2 or later)
URL: http://imai.princeton.edu/research/eco.html
-Packaged: Thu Jan 11 08:41:29 2007; kimai
+Packaged: Wed Jun 27 20:20:08 2007; kimai
diff --git a/R/eco.R b/R/eco.R
index 851a99d..2bd3b32 100644
--- a/R/eco.R
+++ b/R/eco.R
@@ -47,6 +47,9 @@ eco <- function(formula, data = parent.frame(), N = NULL, supplement = NULL,
# check data and modify inputs
tmp <- checkdata(X,Y, supplement, ndim)
bdd <- ecoBD(formula=formula, data=data)
+ W1min <- bdd$Wmin[order(tmp$order.old)[1:nrow(tmp$d)],1,1]
+ W1max <- bdd$Wmax[order(tmp$order.old)[1:nrow(tmp$d)],1,1]
+
## fitting the model
n.store <- floor((n.draws-burnin)/(thin+1))
@@ -64,7 +67,7 @@ eco <- function(formula, data = parent.frame(), N = NULL, supplement = NULL,
as.integer(tmp$X1type), as.integer(tmp$samp.X1),
as.double(tmp$X1.W1), as.integer(tmp$X0type),
as.integer(tmp$samp.X0), as.double(tmp$X0.W2),
- as.double(bdd$Wmin[,1,1]), as.double(bdd$Wmax[,1,1]),
+ as.double(W1min), as.double(W1max),
as.integer(parameter), as.integer(grid),
pdSMu0 = double(n.store), pdSMu1 = double(n.store), pdSMu2 = double(n.store),
pdSSig00=double(n.store), pdSSig01=double(n.store), pdSSig02=double(n.store),
@@ -80,7 +83,7 @@ eco <- function(formula, data = parent.frame(), N = NULL, supplement = NULL,
as.integer(tmp$X1type), as.integer(tmp$samp.X1),
as.double(tmp$X1.W1), as.integer(tmp$X0type),
as.integer(tmp$samp.X0), as.double(tmp$X0.W2),
- as.double(bdd$Wmin[,1,1]), as.double(bdd$Wmax[,1,1]),
+ as.double(W1min), as.double(W1max),
as.integer(parameter), as.integer(grid),
pdSMu0=double(n.store), pdSMu1=double(n.store),
pdSSig00=double(n.store),
diff --git a/R/ecoBD.R b/R/ecoBD.R
index ce2835d..ab73696 100644
--- a/R/ecoBD.R
+++ b/R/ecoBD.R
@@ -2,6 +2,8 @@ ecoBD <- function(formula, data = parent.frame(), N=NULL){
mf <- match.call()
tt <- terms(formula)
attr(tt, "intercept") <- 0
+ vnames <- attr(tt, "variables")
+ vnamesR <- vnames[[2]]
if (is.matrix(eval.parent(mf$data)))
data <- as.data.frame(data)
@@ -29,10 +31,27 @@ ecoBD <- function(formula, data = parent.frame(), N=NULL){
R <- ncol(Y)
Wmin <- Wmax <- Nmin <- Nmax <- array(NA, c(n.obs, R, C))
clab <- rlab <- NULL
- for (j in 1:C)
- clab <- c(clab, paste("c", j, sep=""))
+ if (length(vnames) == 3)
+ clab <- c(vnames[[3]], paste("not",vnames[[3]]))
+ else {
+ for (j in 1:C) {
+ if ((j == C) & (length(vnames) < j+2))
+ clab <- c(clab, "other")
+ else
+ clab <- c(clab, vnames[[j+2]])
+ }
+ }
+ if (length(vnamesR) == 1)
+ rlab <- c(vnamesR, paste("not",vnamesR))
+ else {
+ for (i in 1:R) {
+ if ((i == R) & (length(vnamesR) < i+1))
+ rlab <- c(rlab, "other")
+ else
+ rlab <- c(rlab, vnamesR[[i]])
+ }
+ }
for (i in 1:R) {
- rlab <- c(rlab, paste("r", i, sep=""))
for (j in 1:C) {
Nmin[,i,j] <- apply(cbind(0, X[,j]+Y[,i]-N), 1, max)
Nmax[,i,j] <- apply(cbind(Y[,i], X[,j]), 1, min)
@@ -58,10 +77,27 @@ ecoBD <- function(formula, data = parent.frame(), N=NULL){
R <- ncol(Y)
Wmin <- Wmax <- array(NA, c(n.obs, R, C))
clab <- rlab <- NULL
- for (j in 1:C)
- clab <- c(clab, paste("c", j, sep=""))
+ if (length(vnames) == 3)
+ clab <- c(vnames[[3]], paste("not",vnames[[3]]))
+ else {
+ for (j in 1:C) {
+ if ((j == C) & (length(vnames) < j+2))
+ clab <- c(clab, "other")
+ else
+ clab <- c(clab, vnames[[j+2]])
+ }
+ }
+ if (length(vnamesR) == 1)
+ rlab <- c(vnamesR, paste("not",vnamesR))
+ else {
+ for (i in 1:R) {
+ if ((i == R) & (length(vnamesR) < i+1))
+ rlab <- c(rlab, "other")
+ else
+ rlab <- c(rlab, vnamesR[[i]])
+ }
+ }
for (i in 1:R) {
- rlab <- c(rlab, paste("r", i, sep=""))
for (j in 1:C) {
Wmin[,i,j] <- apply(cbind(0, (X[,j]+Y[,i]-1)/X[,j]), 1, max)
Wmax[,i,j] <- apply(cbind(1, Y[,i]/X[,j]), 1, min)
diff --git a/R/ecoNP.R b/R/ecoNP.R
index a359e89..fb32210 100644
--- a/R/ecoNP.R
+++ b/R/ecoNP.R
@@ -47,7 +47,9 @@ ecoNP <- function(formula, data = parent.frame(), N = NULL, supplement = NULL,
## checking the data and calculating the bounds
tmp <- checkdata(X, Y, supplement, ndim)
bdd <- ecoBD(formula, data=data)
-
+ W1min <- bdd$Wmin[order(tmp$order.old)[1:nrow(tmp$d)],1,1]
+ W1max <- bdd$Wmax[order(tmp$order.old)[1:nrow(tmp$d)],1,1]
+
## fitting the model
n.store <- floor((n.draws-burnin)/(thin+1))
unit.par <- unit.w <- tmp$n.samp+tmp$samp.X1+tmp$samp.X0
@@ -66,7 +68,7 @@ ecoNP <- function(formula, data = parent.frame(), N = NULL, supplement = NULL,
as.integer(tmp$samp.X1), as.double(tmp$X1.W1),
as.integer(tmp$X0type), as.integer(tmp$samp.X0),
as.double(tmp$X0.W2),
- as.double(bdd$Wmin[,1,1]), as.double(bdd$Wmax[,1,1]),
+ as.double(W1min), as.double(W1max),
as.integer(parameter), as.integer(grid),
pdSMu0=double(n.par), pdSMu1=double(n.par),
pdSMu2=double(n.par),
@@ -86,7 +88,7 @@ ecoNP <- function(formula, data = parent.frame(), N = NULL, supplement = NULL,
as.integer(tmp$samp.X1), as.double(tmp$X1.W1),
as.integer(tmp$X0type), as.integer(tmp$samp.X0),
as.double(tmp$X0.W2),
- as.double(bdd$Wmin[,1,1]), as.double(bdd$Wmax[,1,1]),
+ as.double(W1min), as.double(W1max),
as.integer(parameter), as.integer(grid),
pdSMu0=double(n.par), pdSMu1=double(n.par),
pdSSig00=double(n.par), pdSSig01=double(n.par),
diff --git a/R/emeco.R b/R/emeco.R
index 9eabaeb..139d8e3 100644
--- a/R/emeco.R
+++ b/R/emeco.R
@@ -5,7 +5,7 @@
ecoML <- function(formula, data = parent.frame(), N=NULL, supplement = NULL,
theta.start = c(0,0,1,1,0), fix.rho = FALSE,
context = FALSE, sem = TRUE, epsilon=10^(-10),
- maxit = 1000, loglik = TRUE, hyptest=FALSE, verbose= TRUE) {
+ maxit = 1000, loglik = TRUE, hyptest=FALSE, verbose= FALSE) {
## getting X and Y
@@ -43,6 +43,10 @@ ecoML <- function(formula, data = parent.frame(), N=NULL, supplement = NULL,
##checking data
tmp <- checkdata(X, Y, supplement, ndim)
bdd <- ecoBD(formula=formula, data=data)
+ W1min <- bdd$Wmin[order(tmp$order.old)[1:nrow(tmp$d)],1,1]
+ W1max <- bdd$Wmax[order(tmp$order.old)[1:nrow(tmp$d)],1,1]
+
+
n <- tmp$n.samp+tmp$survey.samp+tmp$samp.X1+tmp$samp.X0
wcol<-ndim
if (context) {
@@ -61,7 +65,7 @@ ecoML <- function(formula, data = parent.frame(), N=NULL, supplement = NULL,
as.double(tmp$survey.data),
as.integer(tmp$X1type), as.integer(tmp$samp.X1), as.double(tmp$X1.W1),
as.integer(tmp$X0type), as.integer(tmp$samp.X0), as.double(tmp$X0.W2),
- as.double(bdd$Wmin[,1,1]), as.double(bdd$Wmax[,1,1]),
+ as.double(W1min), as.double(W1max),
as.integer(flag),as.integer(verbose),as.integer(loglik),as.integer(hyptest),
optTheta=rep(-1.1,n.var), pdTheta=double(n.var),
S=double(n.S+1),inSample=double(inSample.length),DMmatrix=double(n.par*n.par),
diff --git a/R/print.summary.eco.R b/R/print.summary.eco.R
index a51a4e4..ceddccc 100644
--- a/R/print.summary.eco.R
+++ b/R/print.summary.eco.R
@@ -5,17 +5,17 @@ print.summary.eco <- function(x, digits=max(3, getOption("digits")-3), ...) {
cat("\n")
if (!is.null(x$param.table)) {
cat("\nParameter Estimates:\n")
- printCoefmat(x$param.table, digits=digits, na.print="NA",...)
+ print(x$param.table, digits=digits, na.print="NA",...)
}
cat("\n*** Insample Predictions ***\n")
cat("\nUnweighted:\n")
- printCoefmat(x$agg.table, digits=digits, na.print="NA",...)
+ print(x$agg.table, digits=digits, na.print="NA",...)
if (!is.null(x$agg.wtable)) {
cat("\nWeighted:\n")
- printCoefmat(x$agg.wtable, digits=digits, na.print="NA",...)
+ print(x$agg.wtable, digits=digits, na.print="NA",...)
}
cat("\nNumber of Units:", x$n.obs)
@@ -24,9 +24,9 @@ print.summary.eco <- function(x, digits=max(3, getOption("digits")-3), ...) {
if (!is.null(x$W1.table)) {
cat("\n\nUnit-level Estimates of W1:\n")
- printCoefmat(x$W1.table, digits=digits, na.print="NA",...)
+ print(x$W1.table, digits=digits, na.print="NA",...)
cat("\n\nUnit-level Estimates of W2:\n")
- printCoefmat(x$W2.table, digits=digits, na.print="NA",...)
+ print(x$W2.table, digits=digits, na.print="NA",...)
}
cat("\n")
diff --git a/R/print.summary.ecoML.R b/R/print.summary.ecoML.R
index 916b930..c1ba4ae 100644
--- a/R/print.summary.ecoML.R
+++ b/R/print.summary.ecoML.R
@@ -9,20 +9,20 @@ print.summary.ecoML <- function(x, digits=max(3,
cat("\nOriginal Model Parameters (rho is fixed at ", x$rho, "):\n", sep="")
else
cat("\nOriginal Model Parameters:\n")
- printCoefmat(x$param.table, digits=digits, na.print="NA",...)
+ print(x$param.table, digits=digits, na.print="NA",...)
}
cat("\n*** Insample Predictions ***\n")
cat("\nUnweighted:\n")
- printCoefmat(x$agg.table, digits=digits, na.print="NA",...)
+ print(x$agg.table, digits=digits, na.print="NA",...)
if (!is.null(x$agg.wtable)) {
cat("\nWeighted:\n")
- printCoefmat(x$agg.wtable, digits=digits, na.print="NA",...)
+ print(x$agg.wtable, digits=digits, na.print="NA",...)
}
if (!is.null(x$W.table)) {
cat("\n\nUnit-level Estimates of W:\n")
- printCoefmat(x$W.table, digits=digits, na.print="NA",...)
+ print(x$W.table, digits=digits, na.print="NA",...)
}
cat("\n\nLog-likelihood:", x$loglik)
diff --git a/R/print.summary.ecoNP.R b/R/print.summary.ecoNP.R
index 6a3cc67..4adfeea 100644
--- a/R/print.summary.ecoNP.R
+++ b/R/print.summary.ecoNP.R
@@ -5,11 +5,11 @@ print.summary.ecoNP <- function(x, digits=max(3, getOption("digits")-3), ...)
cat("\n\nIn-sample Predictions:\n")
cat("\nUnweighted:\n")
- printCoefmat(x$agg.table, digits=digits, na.print="NA",...)
+ print(x$agg.table, digits=digits, na.print="NA",...)
if (!is.null(x$agg.wtable)) {
cat("\nWeighted:\n")
- printCoefmat(x$agg.wtable, digits=digits, na.print="NA",...)
+ print(x$agg.wtable, digits=digits, na.print="NA",...)
}
cat("\nNumber of Units:", x$n.obs)
@@ -17,22 +17,22 @@ print.summary.ecoNP <- function(x, digits=max(3, getOption("digits")-3), ...)
if (!is.null(x$param.table)) {
tt <- x$param.table
cat("\nParameter Estimates of mu1:\n")
- printCoefmat(tt$mu1.table, digits=digits, na.print="NA",...)
+ print(tt$mu1.table, digits=digits, na.print="NA",...)
cat("\nParameter Estimates of mu2:\n")
- printCoefmat(tt$mu2.table, digits=digits, na.print="NA",...)
+ print(tt$mu2.table, digits=digits, na.print="NA",...)
cat("\nParameter Estimates of Sigma11:\n")
- printCoefmat(tt$Sigma11.table, digits=digits, na.print="NA",...)
+ print(tt$Sigma11.table, digits=digits, na.print="NA",...)
cat("\nParameter Estimates of Sigma12:\n")
- printCoefmat(tt$Sigma12.table, digits=digits, na.print="NA",...)
+ print(tt$Sigma12.table, digits=digits, na.print="NA",...)
cat("\nParameter Estimates of Sigma22:\n")
- printCoefmat(tt$Sigma22.table, digits=digits, na.print="NA",...)
+ print(tt$Sigma22.table, digits=digits, na.print="NA",...)
}
if (!is.null(x$W1.table)) {
cat("\n\nUnit-level Estimates of W1:\n")
- printCoefmat(x$W1.table, digits=digits, na.print="NA",...)
+ print(x$W1.table, digits=digits, na.print="NA",...)
cat("\n\nUnit-level Estimates of W2:\n")
- printCoefmat(x$W2.table, digits=digits, na.print="NA",...)
+ print(x$W2.table, digits=digits, na.print="NA",...)
}
cat("\n")
diff --git a/R/print.summary.predict.eco.R b/R/print.summary.predict.eco.R
index 5a546c5..7bc7826 100644
--- a/R/print.summary.predict.eco.R
+++ b/R/print.summary.predict.eco.R
@@ -1,7 +1,7 @@
print.summary.predict.eco <- function(x, digits=max(3, getOption("digits")
-3), ...) {
cat("\nOut-of-sample Prediction:\n")
- printCoefmat(x$W.table, digits=digits, na.print="NA",...)
+ print(x$W.table, digits=digits, na.print="NA",...)
cat("\nNumber of Monte Carlo Draws:", x$n.draws)
diff --git a/man/eco.Rd b/man/eco.Rd
index 4a0650a..f28ec6e 100644
--- a/man/eco.Rd
+++ b/man/eco.Rd
@@ -10,10 +10,9 @@
Normal/Inverse-Wishart prior) for ecological inference in \eqn{2
\times 2} tables via Markov chain Monte Carlo. It gives the in-sample
predictions as well as the estimates of the model parameters. The
- model and algorithm are described in Imai, Lu and Strauss (2006). The
- contextual effect can also be modeled by following the strategy
- described in Imai, Lu, and Strauss (2006).
-}
+ model and algorithm are described in Imai, Lu and Strauss (Forthcoming
+ a and b).
+ }
\usage{
eco(formula, data = parent.frame(), N = NULL, supplement = NULL,
@@ -26,8 +25,9 @@ eco(formula, data = parent.frame(), N = NULL, supplement = NULL,
\arguments{
\item{formula}{A symbolic description of the model to be fit,
specifying the column and row margins of \eqn{2 \times
- 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the
- column margin and \code{X} as the row margin. Details and specific
+ 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the
+ column margin (e.g., turnout) and \code{X} as the row margin
+ (e.g., percent African-American). Details and specific
examples are given below.
}
\item{data}{An optional data frame in which to interpret the variables
@@ -43,27 +43,31 @@ eco(formula, data = parent.frame(), N = NULL, supplement = NULL,
model. The default is \code{NULL}.
}
\item{context}{Logical. If \code{TRUE}, the contextual effect is also
- modeled. See Imai and Lu (2005) for details. The default is
- \code{FALSE}.
+ modeled, that is to assume the row margin \eqn{X} and the unknown
+ \eqn{W_1} and \eqn{W_2} are correlated. See Imai, Lu and Strauss
+ (Forthcoming b) for details. The default is \code{FALSE}.
}
\item{mu0}{A scalar or a numeric vector that specifies the prior mean
- for the mean parameter \eqn{\mu}. If it is a scalar, then its value
- will be repeated to yield a vector of the length of \eqn{\mu}, otherwise,
- it needs to be a vector of same length as \eqn{\mu}.
- When \code{context=TRUE}, the length of \eqn{\mu} is 3,
- otherwise it is 2. The default is \code{0}.
- }
- \item{tau0}{A positive integer representing the prior scale
- for the mean parameter \eqn{\mu}. The default is \code{2}.
+ for the mean parameter \eqn{\mu} for \eqn{(W_1,W_2)} (or for
+ \eqn{(W_1, W_2, X)} if \code{context=TRUE}). When the input of
+ \code{mu0} is a scalar, its value will be repeated to yield a vector of
+ the length of \eqn{\mu}, otherwise, it needs to be a vector of same
+ length as \eqn{\mu}. When \code{context=TRUE}, the length of \eqn{\mu}
+ is 3, otherwise it is 2. The default is \code{0}.
}
+ \item{tau0}{A positive integer representing the scale parameter of the
+ Normal-Inverse Wishart prior for the mean and variance parameter
+ \eqn{(\mu, \Sigma)}. The default is \code{2}.}
\item{nu0}{A positive integer representing the prior degrees of
- freedom of the variance matrix \eqn{\Sigma}. the default is \code{4}.
+ freedom of the Normal-Inverse Wishart prior for the mean and
+ variance parameter \eqn{(\mu, \Sigma)}. The default is \code{4}.
}
\item{S0}{A positive scalar or a positive definite matrix that specifies
- the prior scale matrix for the variance matrix \eqn{\Sigma}. If it is
+ the prior scale matrix of the Normal-Inverse Wishart prior for the
+ mean and variance parameter \eqn{(\mu, \Sigma)} . If it is
a scalar, then the prior scale matrix will be a diagonal matrix with
- the same dimensions as \eqn{\Sigma} and the diagonal elements all take value
- of \code{S0}, otherwise \code{S0} needs to have same dimensions as
+ the same dimensions as \eqn{\Sigma} and the diagonal elements all take
+ value of \code{S0}, otherwise \code{S0} needs to have same dimensions as
\eqn{\Sigma}. When \code{context=TRUE}, \eqn{\Sigma} is a
\eqn{3 \times 3} matrix, otherwise, it is \eqn{2 \times 2}.
The default is \code{10}.
@@ -96,6 +100,7 @@ eco(formula, data = parent.frame(), N = NULL, supplement = NULL,
algorithm is used where candidate draws are sampled from the uniform
distribution on the tomography line for each unit. Note that the
grid method is significantly slower than the Metropolis algorithm.
+ The default is \code{FALSE}.
}
\item{n.draws}{A positive integer. The number of MCMC draws.
The default is \code{5000}.
@@ -116,17 +121,20 @@ eco(formula, data = parent.frame(), N = NULL, supplement = NULL,
\details{
An example of \eqn{2 \times 2} ecological table for racial voting is
given below:
- \tabular{lccc}{
- \tab black voters \tab white voters \tab \cr
- Voted \tab \eqn{W_{1i}} \tab \eqn{W_{2i}} \tab \eqn{Y_i} \cr
- Not voted \tab \eqn{1-W_{1i}} \tab \eqn{1-W_{2i}} \tab \eqn{1-Y_i} \cr
- \tab \eqn{X_i} \tab \eqn{1-X_i} \tab
+ \tabular{llccc}{
+ \tab \tab black voters \tab white voters \tab \cr
+ \tab vote \tab \eqn{W_{1i}} \tab \eqn{W_{2i}} \tab \eqn{Y_i} \cr
+ \tab not vote \tab \eqn{1-W_{1i}} \tab \eqn{1-W_{2i}} \tab \eqn{1-Y_i} \cr
+ \tab \tab \eqn{X_i} \tab \eqn{1-X_i} \tab
}
where \eqn{Y_i} and \eqn{X_i} represent the observed margins, and
- \eqn{W_1} and \eqn{W_2} are unknown variables. All variables are
- proportions and hence bounded between 0 and 1. For each \eqn{i}, the
- following deterministic relationship holds,
- \eqn{Y_i=X_i W_{1i}+(1-X_i)W_{2i}}.
+ \eqn{W_1} and \eqn{W_2} are unknown variables. In this exmaple,
+ \eqn{Y_i} is the turnout rate in the ith precint, \eqn{X_i} is the
+ proproption of African American in the ith precinct. The unknowns
+ \eqn{W_{1i}} an d\eqn{W_{2i}} are the black and white turnout,
+ respectively. All variables are proportions and hence bounded
+ between 0 and 1. For each \eqn{i}, the following deterministic
+ relationship holds, \eqn{Y_i=X_i W_{1i}+(1-X_i)W_{2i}}.
}
\examples{
@@ -135,31 +143,32 @@ eco(formula, data = parent.frame(), N = NULL, supplement = NULL,
data(reg)
## NOTE: convergence has not been properly assessed for the following
-## examples. See Imai, Lu and Strauss (2006) for more complete analyses.
+## examples. See Imai, Lu and Strauss (Forthcoming a and b) for more
+## complete analyses.
## fit the parametric model with the default prior specification
-\dontrun{res <- eco(Y ~ X, data = reg, verbose = TRUE)}
+res <- eco(Y ~ X, data = reg, verbose = TRUE)
## summarize the results
-\dontrun{summary(res)}
+summary(res)
## obtain out-of-sample prediction
-\dontrun{out <- predict(res, verbose = TRUE)}
+out <- predict(res, verbose = TRUE)
## summarize the results
-\dontrun{summary(out)}
+summary(out)
## load the Robinson's census data
data(census)
## fit the parametric model with contextual effects and N
## using the default prior specification
-\dontrun{res1 <- eco(Y ~ X, N = N, context = TRUE, data = census, verbose = TRUE)}
+res1 <- eco(Y ~ X, N = N, context = TRUE, data = census, verbose = TRUE)
## summarize the results
-\dontrun{summary(res1)}
+summary(res1)
## obtain out-of-sample prediction
-\dontrun{out1 <- predict(res1, verbose = TRUE)}
+out1 <- predict(res1, verbose = TRUE)
## summarize the results
-\dontrun{summary(out1)}
+summary(out1)
}
@@ -197,9 +206,14 @@ data(census)
}
\references{
- Imai, Kosuke, Ying Lu and Aaron Strauss. (2006) \dQuote{Bayesian and
+ Imai, Kosuke, Ying Lu and Aaron Strauss. (Forthcoming).
+ \dQuote{eco: R Package for Ecological Inference in 2x2 Tables}
+ Journal of Statistical Software, available at
+ \url{http://imai.princeton.edu/research/eco.html}
+
+ Imai, Kosuke, Ying Lu and Aaron Strauss. (Forthcoming). \dQuote{Bayesian and
Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
- Approach} Technical Report, Princeton University, available at
+ Approach} Political Analysis, available at
\url{http://imai.princeton.edu/research/eiall.html}
}
diff --git a/man/ecoBD.Rd b/man/ecoBD.Rd
index 4321ec7..458f334 100644
--- a/man/ecoBD.Rd
+++ b/man/ecoBD.Rd
@@ -37,8 +37,12 @@ ecoBD(formula, data = parent.frame(), N = NULL)
For example, \code{Y ~ X} specifies \code{Y} as the first column
margin and \code{X} as the first row margin in \eqn{2 \times 2} tables.
If counts are used, \code{formula} may omit the last row and/or column
- margin of the table only if \code{N} is supplied. For larger tables,
- one can use \code{cbind()} and \code{+}. For example, \code{cbind(Y1,
+ margin of the table only if \code{N} is supplied. In this example, the
+ columns will be labeled as \code{X} and \code{not X}, and the rows
+ will be labeled as \code{Y} and \code{not Y}.
+
+ For larger tables, one can use \code{cbind()} and \code{+}. For
+ example, \code{cbind(Y1,
Y2, Y3) ~ X1 + X2 + X3 + X4)} specifies \eqn{3 \times 4} tables.
An \eqn{R \times C} ecological table in the form of counts:
@@ -83,9 +87,9 @@ ecoBD(formula, data = parent.frame(), N = NULL)
data(reg)
## calculate the bounds
-\dontrun{res <- ecoBD(Y ~ X, N = N, data = reg)}
+res <- ecoBD(Y ~ X, N = N, data = reg)
## print the results
-\dontrun{print(res)}
+print(res)
}
\value{
@@ -112,15 +116,23 @@ data(reg)
The object can be printed through \code{print.ecoBD}.
}
+\references{
+ Imai, Kosuke, Ying Lu and Aaron Strauss. (Forthcoming)
+ \dQuote{eco: R Package for Ecological Inference in 2x2 Tables}
+ Journal of Statistical Software, available at
+ \url{http://imai.princeton.edu/research/eco.html}
+
+ Imai, Kosuke, Ying Lu and Aaron Strauss. (Forthcoming)
+ \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological Tables:
+ An Incomplete Data Approach} Political Analysis, available at
+ \url{http://imai.princeton.edu/research/eiall.html}
+}
+
\author{
Kosuke Imai, Department of Politics, Princeton University
- \email{kimai at Princeton.Edu}, \url{http://www.princeton.edu/~kimai};
+ \email{kimai at Princeton.Edu}, \url{http://imai.princeton.edu/};
Ying Lu, Institute for Quantitative Social Sciences,
- Harvard University \email{ylu at Latte.Harvard.Edu}}
-
-\references{
- Imai, Kosuke. (2005) \dQuote{Ecological Inference in \eqn{R \times C}
- Tables} Working Paper, Princeton University.
+ Harvard University \email{ylu at Latte.Harvard.Edu}
}
\seealso{\code{eco}, \code{ecoNP}}
diff --git a/man/ecoML.Rd b/man/ecoML.Rd
index adb7f7d..f4c2b10 100644
--- a/man/ecoML.Rd
+++ b/man/ecoML.Rd
@@ -8,28 +8,31 @@
\description{
\code{ecoML} is used to fit parametric models for ecological
inference in \eqn{2 \times 2} tables via Expectation Maximization (EM)
- algorithms. It gives the point estimates of the parameters for models
+ algorithms. The data is specified in proportions. At it's most basic setting, the algorithm
+ assumes that the individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) and distributed bivariate normally (after logit
+ transformations). The function calculates point estimates of the parameters for models
based on different assumptions. The standard errors of the point
estimates are also computed via Supplemented EM algorithms. Moreover,
\code{ecoML} quantifies the amount of missing information associated
with each parameter and allows researcher to examine the impact of
missing information on parameter estimation in ecological
inference. The models and algorithms are described in Imai,
- Lu and Strauss(2006).
+ Lu and Strauss (Forthcoming).
}
\usage{
ecoML(formula, data = parent.frame(), N = NULL, supplement = NULL,
theta.start = c(0,0,1,1,0), fix.rho = FALSE,
context = FALSE, sem = TRUE, epsilon = 10^(-10),
- maxit = 1000, loglik = TRUE, hyptest = FALSE, verbose = TRUE)
+ maxit = 1000, loglik = TRUE, hyptest = FALSE, verbose = FALSE)
}
\arguments{
\item{formula}{A symbolic description of the model to be fit,
specifying the column and row margins of \eqn{2 \times
- 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the
- column margin and \code{X} as the row margin. Details and specific
+ 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the
+ column margin (e.g., turnout) and \code{X} (e.g., percent
+ African-American) as the row margin. Details and specific
examples are given below.
}
\item{data}{An optional data frame in which to interpret the variables
@@ -52,11 +55,13 @@
Imai, Lu and Strauss(2006). The default is \code{FALSE}.
}
\item{context}{Logical. If \code{TRUE}, the contextual effect is also
- modeled. See Imai, Lu and Strauss (2006) for details. The default is
- \code{FALSE}.
+ modeled. In this case, the row margin (i.e., X) and the individual-level rates
+ (i.e., \eqn{W_1} and \eqn{W_2}) are assumed to be distributed tri-variate normally
+ (after logit transformations). See Imai, Lu and Strauss (2006) for
+ details. The default is \code{FALSE}.
}
\item{sem}{Logical. If \code{TRUE}, the standard errors of parameter
- estimates are estimated via SEM algorithm. The default is
+ estimates are estimated via SEM algorithm, as well as the fraction of missing data. The default is
\code{TRUE}.
}
\item{theta.start}{A numeric vector that specifies the starting values
@@ -85,7 +90,7 @@
\code{TRUE}.
}
\item{hyptest}{Logical. If \code{TRUE}, model is estimated under the null
- hypothesis that mean of \eqn{W1} and \eqn{W2} is the same.
+ hypothesis that means of \eqn{W1} and \eqn{W2} are the same.
The default is \code{FALSE}.
}
\item{verbose}{Logical. If \code{TRUE}, the progress of the EM and SEM
@@ -129,7 +134,7 @@ data(census)
data(census)
## fit the parametric model with the default model specifications
-\dontrun{res <- ecoML(Y ~ X, data = census[1:100,],epsilon=10^(-6), verbose = TRUE)}
+\dontrun{res <- ecoML(Y ~ X, data = census[1:100,],N=census[1:100,3],epsilon=10^(-6), verbose = TRUE)}
## summarize the results
\dontrun{summary(res)}
@@ -178,10 +183,14 @@ surv <- 1:600
achieved.}
\item{loglik.log.em}{A vector saving the value of the log-likelihood
function at each iteration of the EM algorithm.}
- \item{mu.log.em}{A matrix saving the mean estimation at each
- iteration of EM.}
- \item{Sigma.log.em}{A matrix saving the variance-covariance estimation
- at each iteration of EM.}
+ \item{mu.log.em}{A matrix saving the unweighted mean estimation of the logit-transformed
+ individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of the EM process.}
+ \item{Sigma.log.em}{A matrix saving the log of the variance estimation of the logit-transformed
+ individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of EM process.
+ Note, non-transformed variances are displayed on the screen (when \code{verbose = TRUE}).}
+ \item{rho.fisher.em}{A matrix saving the fisher transformation of the estimation of the correlations between
+ the logit-transformed individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of EM process.
+ Note, non-transformed correlations are displayed on the screen (when \code{verbose = TRUE}).}
Moreover, when \code{sem=TRUE}, \code{ecoML} also output the following
values:
\item{DM}{The matrix characterizing the rates of convergence of the EM
@@ -209,7 +218,6 @@ surv <- 1:600
\item{VFmis}{The proportion of increased variance associated with each
parameter estimation due to observed data. }
\item{Ieigen}{The largest eigen value of \code{Imiss}.}
- \item{DM}{The rate-of-convergence matrix of the SEM algorithm.}
\item{Icom.trans}{The complete data information matrix for the fisher
transformed parameters.}
\item{Iobs.trans}{The observed data information matrix for the fisher
@@ -228,10 +236,15 @@ surv <- 1:600
}
\references{
- Imai, Kosuke, Ying Lu and Aaron Strauss. (2006) \dQuote{Bayesian and
- Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
- Approach} Technical Report, Princeton University, available at
- \url{http://imai.princeton.edu/research/eiall.html}
+ Imai, Kosuke, Ying Lu and Aaron Strauss. (Forthcoming).
+ \dQuote{eco: R Package for Ecological Inference in 2x2 Tables}
+ Journal of Statistical Software, available at
+ \url{http://imai.princeton.edu/research/eco.html}
+
+ Imai, Kosuke, Ying Lu and Aaron Strauss. (Forthcoming).
+ \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological Tables:
+ An Incomplete Data Approach} Political Analysis, available at
+ \url{http://imai.princeton.edu/research/eiall.html}
}
\seealso{\code{eco}, \code{ecoNP}, \code{summary.ecoML}}
diff --git a/man/ecoNP.Rd b/man/ecoNP.Rd
index 9fe3680..0a2ed99 100644
--- a/man/ecoNP.Rd
+++ b/man/ecoNP.Rd
@@ -11,7 +11,7 @@
2} tables via Markov chain Monte Carlo. It gives the in-sample
predictions as well as out-of-sample predictions for population
inference. The models and algorithms are described in Imai, Lu and
- Strauss (2006).
+ Strauss (Forthcoming a and b).
}
\usage{
@@ -25,9 +25,10 @@ ecoNP(formula, data = parent.frame(), N = NULL, supplement = NULL,
\arguments{
\item{formula}{A symbolic description of the model to be fit,
specifying the column and row margins of \eqn{2 \times
- 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the
- column margin and \code{X} as the row margin. Details and specific
- examples are given below.
+ 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the
+ column margin (e.g., turnout) and \code{X} as the row margin
+ (e.g., percent African-American). Details and specific examples
+ are given below.
}
\item{data}{An optional data frame in which to interpret the variables
in \code{formula}. The default is the environment in which
@@ -42,28 +43,33 @@ ecoNP(formula, data = parent.frame(), N = NULL, supplement = NULL,
model. The default is \code{NULL}.
}
\item{context}{Logical. If \code{TRUE}, the contextual effect is also
- modeled. See Imai and Lu (2005) for details. The default is
- \code{FALSE}.
+ modeled, that is to assume the row margin \eqn{X} and the unknown
+ \eqn{W_1} and \eqn{W_2} are correlated. See Ima, Lu and Strauss
+ (Forthcoming b) for details. The default is \code{FALSE}.
}
\item{mu0}{A scalar or a numeric vector that specifies the prior mean
- for the mean parameter \eqn{\mu}. If it is a scalar, then its value
- will be repeated to yield a vector of the length of \eqn{\mu}, otherwise,
+ for the mean parameter \eqn{\mu} of the base prior distribution \eqn{G_0}
+ (see Imia, Lu and Strauss (Forthcoming a and b) for detailed
+ descriptions of Dirichlete prior and the normal base prior distribution) .
+ If it is a scalar, then its value will be repeated to yield a vector
+ of the length of \eqn{\mu}, otherwise,
it needs to be a vector of same length as \eqn{\mu}.
When \code{context=TRUE }, the length of \eqn{\mu} is 3,
otherwise it is 2. The default is \code{0}.
}
- \item{tau0}{A positive integer representing the prior scale
- for the mean parameter \eqn{\mu}. The default is \code{2}.
- }
+ \item{tau0}{A positive integer representing the scale parameter of the
+ Normal-Inverse Wishart prior for the mean and variance parameter
+ \eqn{(\mu_i, \Sigma_i)} of each observation. The default is \code{2}.}
+
\item{nu0}{A positive integer representing the prior degrees of
- freedom of the variance matrix \eqn{\Sigma}. the default is \code{4}.
+ freedom of the variance matrix \eqn{\Sigma_i}. the default is \code{4}.
}
\item{S0}{A positive scalar or a positive definite matrix that specifies
- the prior scale matrix for the variance matrix \eqn{\Sigma}. If it is
+ the prior scale matrix for the variance matrix \eqn{\Sigma_i}. If it is
a scalar, then the prior scale matrix will be a diagonal matrix with
- the same dimensions as \eqn{\Sigma} and the diagonal elements all take value
- of \code{S0}, otherwise \code{S0} needs to have same dimensions as
- \eqn{\Sigma}. When \code{context=TRUE}, \eqn{\Sigma} is a
+ the same dimensions as \eqn{\Sigma_i} and the diagonal elements all
+ take value of \code{S0}, otherwise \code{S0} needs to have same
+ dimensions as \eqn{\Sigma_i}. When \code{context=TRUE}, \eqn{\Sigma} is a
\eqn{3 \times 3} matrix, otherwise, it is \eqn{2 \times 2}.
The default is \code{10}.
}
@@ -121,11 +127,13 @@ data(reg)
## fit the nonparametric model to give in-sample predictions
## store the parameters to make population inference later
res <- ecoNP(Y ~ X, data = reg, n.draws = 50, param = TRUE, verbose = TRUE)
+
##summarize the results
summary(res)
## obtain out-of-sample prediction
out <- predict(res, verbose = TRUE)
+
## summarize the results
summary(out)
@@ -140,14 +148,16 @@ data(census)
## fit the parametric model with contextual effects and N
## using the default prior specification
-\dontrun{res1 <- ecoNP(Y ~ X, N = N, context = TRUE, param = TRUE, data = census,
- n.draws = 25, verbose = TRUE)}
+
+res1 <- ecoNP(Y ~ X, N = N, context = TRUE, param = TRUE, data = census,
+n.draws = 25, verbose = TRUE)
+
## summarize the results
-\dontrun{summary(res1)}
+summary(res1)
## out-of sample prediction
-\dontrun{pres1 <- predict(res1)}
-\dontrun{summary(pres1)}
+pres1 <- predict(res1)
+summary(pres1)
}
\value{
@@ -191,10 +201,15 @@ data(census)
}
\references{
- Imai, Kosuke, Ying Lu and Aaron Strauss. (2006) \dQuote{Bayesian and
- Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data
- Approach} Technical Report, Princeton University, available at
- \url{http://imai.princeton.edu/research/eiall.html}
+ Imai, Kosuke, Ying Lu and Aaron Strauss. (Forthcoming).
+ \dQuote{eco: R Package for Ecological Inference in 2x2 Tables}
+ Journal of Statistical Software, available at
+ \url{http://imai.princeton.edu/research/eco.html}
+
+ Imai, Kosuke, Ying Lu and Aaron Strauss. (Forthcoming).
+ \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological Tables:
+ An Incomplete Data Approach} Political Analysis, available at
+ \url{http://imai.princeton.edu/research/eiall.html}
}
\seealso{\code{eco}, \code{ecoML}, \code{predict.eco}, \code{summary.ecoNP}}
diff --git a/man/predict.ecoNP.Rd b/man/predict.ecoNP.Rd
index 0439112..973a9f7 100644
--- a/man/predict.ecoNP.Rd
+++ b/man/predict.ecoNP.Rd
@@ -1,4 +1,4 @@
-\name{predict.eco}
+\name{predict.ecoNP}
\alias{predict.ecoNP}
\alias{predict.ecoNPX}
diff --git a/man/summary.ecoML.Rd b/man/summary.ecoML.Rd
index 8e8bc6d..4770496 100644
--- a/man/summary.ecoML.Rd
+++ b/man/summary.ecoML.Rd
@@ -63,7 +63,8 @@
depends on how many correlation parameters exist in the model. Column
order is the same as the order of the parameters in \code{param.table}.}
\item{param.table}{Final estimates of the parameter values for the model.
- Excludes parameters fixed by the user upon calling \code{ecoML}.}
+ Excludes parameters fixed by the user upon calling \code{ecoML}.
+ See \code{ecoML} documentation for order of parameters.}
\item{agg.table}{Aggregate estimates of the marginal means
of \eqn{W_1} and \eqn{W_2}}
\item{agg.wtable}{Aggregate estimates of the marginal means
@@ -74,7 +75,7 @@
This object can be printed by \code{print.summary.eco}
}
-\seealso{\code{eco}, \code{predict.eco}}
+\seealso{\code{ecoML}}
\author{
Kosuke Imai, Department of Politics, Princeton University,
diff --git a/src/bayes.c b/src/bayes.c
index 9093a71..d21b347 100644
--- a/src/bayes.c
+++ b/src/bayes.c
@@ -37,6 +37,7 @@ void NIWupdate(
double **Sn = doubleMatrix(n_dim, n_dim);
double **mtemp = doubleMatrix(n_dim, n_dim);
+ /*read data */
for (j=0; j<n_dim; j++) {
Ybar[j] = 0;
for (i=0; i<n_samp; i++)
@@ -45,17 +46,20 @@ void NIWupdate(
for (k=0; k<n_dim; k++)
Sn[j][k] = S0[j][k];
}
- for (j=0; j<n_dim; j++) {
- mun[j] = (tau0*mu0[j]+n_samp*Ybar[j])/(tau0+n_samp);
- for (k=0; k<n_dim; k++) {
- Sn[j][k] += (tau0*n_samp)*(Ybar[j]-mu0[j])*(Ybar[k]-mu0[k])/(tau0+n_samp);
- for (i=0; i<n_samp; i++)
- Sn[j][k] += (Y[i][j]-Ybar[j])*(Y[i][k]-Ybar[k]);
- /* conditioning on mu:
- Sn[j][k]+=tau0*(mu[j]-mu0[j])*(mu[k]-mu0[k]);
- Sn[j][k]+=(Y[i][j]-mu[j])*(Y[i][k]-mu[k]); */
+
+ /* posterior updating*/
+
+ for (j=0; j<n_dim; j++)
+ {
+ mun[j] = (tau0*mu0[j]+n_samp*Ybar[j])/(tau0+n_samp);
+ for (k=0; k<n_dim; k++)
+ {
+ Sn[j][k] += (tau0*n_samp)*(Ybar[j]-mu0[j])*(Ybar[k]-mu0[k])/(tau0+n_samp);
+ for (i=0; i<n_samp; i++)
+ Sn[j][k] += (Y[i][j]-Ybar[j])*(Y[i][k]-Ybar[k]);
+ }
}
- }
+
dinv(Sn, n_dim, mtemp);
rWish(InvSigma, mtemp, nu0+n_samp, n_dim);
dinv(InvSigma, n_dim, Sigma);
diff --git a/src/fintegrate.c b/src/fintegrate.c
index 936a69a..11b5ee7 100644
--- a/src/fintegrate.c
+++ b/src/fintegrate.c
@@ -1,7 +1,7 @@
/******************************************************************
This file is a part of eco: R Package for Estimating Fitting
Bayesian Models of Ecological Inference for 2X2 tables
- by Ying Lu and Kosuke Imai
+ by Kosuke Imai, Ying Lu, and Aaron Strauss
Copyright: GPL version 2 or later.
*******************************************************************/
@@ -22,9 +22,11 @@
#include "fintegrate.h"
//#include <gsl/gsl_integration.h>
-//Bivariate normal distribution, with parameterization
-//see: http://mathworld.wolfram.com/BivariateNormalDistribution.html
-//see for param: http://www.math.uconn.edu/~binns/reviewII210.pdf
+/**
+ * Bivariate normal distribution, with parameterization
+ * see: http://mathworld.wolfram.com/BivariateNormalDistribution.html
+ * see for param: http://www.math.uconn.edu/~binns/reviewII210.pdf
+ */
void NormConstT(double *t, int n, void *param)
{
int ii;
@@ -56,27 +58,26 @@ void NormConstT(double *t, int n, void *param)
dtemp=1/(2*M_PI*sqrt(Sigma[0][0]*Sigma[1][1]*(1-rho*rho)));
- for (ii=0; ii<n; ii++)
- {
- imposs=0; inp=t[ii];
- W1[ii]=getW1starFromT(t[ii],pp,&imposs);
- if (!imposs) W2[ii]=getW2starFromT(t[ii],pp,&imposs);
- if (imposs==1) t[ii]=0;
- else {
- W1p[ii]=getW1starPrimeFromT(t[ii],pp);
- W2p[ii]=getW2starPrimeFromT(t[ii],pp);
- pfact=sqrt(W1p[ii]*W1p[ii]+W2p[ii]*W2p[ii]);
- t[ii]=exp(-1/(2*(1-rho*rho))*
- ((W1[ii]-mu[0])*(W1[ii]-mu[0])/Sigma[0][0]+
- (W2[ii]-mu[1])*(W2[ii]-mu[1])/Sigma[1][1]-
- 2*rho*(W1[ii]-mu[0])*(W2[ii]-mu[1])
- /sqrt(Sigma[0][0]*Sigma[1][1])))*dtemp*pfact;
- //if (pp->setP->weirdness)
- // Rprintf("Normc... %d %d %5g -> %5g %5g => %5g with %5g imposs %d\n", ii, n, inp, W1[ii], W2[ii],t[ii],pfact,imposs);
- //char ch;
- //scanf(" %c", &ch );
- }
+ for (ii=0; ii<n; ii++) {
+ imposs=0; inp=t[ii];
+ W1[ii]=getW1starFromT(t[ii],pp,&imposs);
+ if (!imposs) W2[ii]=getW2starFromT(t[ii],pp,&imposs);
+ if (imposs==1) t[ii]=0;
+ else {
+ W1p[ii]=getW1starPrimeFromT(t[ii],pp);
+ W2p[ii]=getW2starPrimeFromT(t[ii],pp);
+ pfact=sqrt(W1p[ii]*W1p[ii]+W2p[ii]*W2p[ii]);
+ t[ii]=exp(-1/(2*(1-rho*rho))*
+ ((W1[ii]-mu[0])*(W1[ii]-mu[0])/Sigma[0][0]+
+ (W2[ii]-mu[1])*(W2[ii]-mu[1])/Sigma[1][1]-
+ 2*rho*(W1[ii]-mu[0])*(W2[ii]-mu[1])
+ /sqrt(Sigma[0][0]*Sigma[1][1])))*dtemp*pfact;
+ //if (pp->setP->weirdness)
+ // Rprintf("Normc... %d %d %5g -> %5g %5g => %5g with %5g imposs %d\n", ii, n, inp, W1[ii], W2[ii],t[ii],pfact,imposs);
+ //char ch;
+ //scanf(" %c", &ch );
}
+ }
Free(W1);
Free(W1p);
Free(W2);
@@ -85,13 +86,14 @@ void NormConstT(double *t, int n, void *param)
FreeMatrix(Sigma,dim);
}
-/*
+/**
* Integrand for computing sufficient statistic
* Which statistic to estimate depends on param->suff (see macros.h)
*/
void SuffExp(double *t, int n, void *param)
{
- int ii,imposs,suff,i,j;
+ int ii,imposs,i,j;
+ sufficient_stat suff;
Param *pp=(Param *)param;
int dim = (pp->setP->ncar==1) ? 3 : 2;
double *mu=doubleArray(dim);
@@ -109,7 +111,7 @@ void SuffExp(double *t, int n, void *param)
W2p = doubleArray(n);
mu[0]= pp->caseP.mu[0];
mu[1]= pp->caseP.mu[1];
- for(i=0;i<dim;i++)
+ for(i=0;i<dim;i++) {
for(j=0;j<dim;j++) {
if (dim==3) {
Sigma[i][j]=pp->setP->Sigma3[i][j];
@@ -120,62 +122,83 @@ void SuffExp(double *t, int n, void *param)
InvSigma[i][j]=pp->setP->InvSigma[i][j];
}
}
+ }
normc=pp->caseP.normcT;
suff=pp->caseP.suff;
imposs=0;
- for (ii=0; ii<n; ii++)
- {
- imposs=0; inp=t[ii];
- W1[ii]=getW1starFromT(t[ii],pp,&imposs);
- if (!imposs) W2[ii]=getW2starFromT(t[ii],pp,&imposs);
- if (imposs==1) t[ii]=0;
- else {
- W1p[ii]=getW1starPrimeFromT(t[ii],pp);
- W2p[ii]=getW2starPrimeFromT(t[ii],pp);
- pfact=sqrt(W1p[ii]*W1p[ii]+W2p[ii]*W2p[ii]);
- vtemp[0] = W1[ii];
- vtemp[1] = W2[ii];
- density=dBVNtomo(vtemp, pp, 0,normc);
- t[ii] = density*pfact;
- if (suff==0) t[ii]=W1[ii]*t[ii];
- else if (suff==1) t[ii]=W2[ii]*t[ii];
- else if (suff==2) t[ii]=W1[ii]*W1[ii]*t[ii];
- else if (suff==3) t[ii]=W1[ii]*W2[ii]*t[ii];
- else if (suff==4) t[ii]=W2[ii]*W2[ii]*t[ii];
- else if (suff==5) t[ii]=invLogit(W1[ii])*t[ii];
- else if (suff==6) t[ii]=invLogit(W2[ii])*t[ii];
- else if (suff==7) {
- if (dim == 3) {
- //if(pp->setP->verbose>=2 && dim==3) Rprintf("InvSigma loglik: %5g %5g %5g %5g %5g %5g\n",InvSigma[0][0],InvSigma[0][1],InvSigma[1][0],InvSigma[1][1],InvSigma[1][2],InvSigma[2][2]);
- vtemp[2]=logit(pp->caseP.X,"log-liklihood");
- mu[0]=pp->setP->pdTheta[1];
- mu[1]=pp->setP->pdTheta[2];
- mu[2]=pp->setP->pdTheta[0];
- }
- t[ii]=dMVN(vtemp,mu,InvSigma,dim,0)*pfact;
- //t[ii]=dMVN3(vtemp,mu,(double*)(&(InvSigma[0][0])),dim,0)*pfact;
- }
- else if (suff!=-1) Rprintf("Error Suff= %d",suff);
+ for (ii=0; ii<n; ii++) {
+ imposs=0; inp=t[ii];
+ W1[ii]=getW1starFromT(t[ii],pp,&imposs);
+ if (!imposs) W2[ii]=getW2starFromT(t[ii],pp,&imposs);
+ if (imposs==1) t[ii]=0;
+ else {
+ W1p[ii]=getW1starPrimeFromT(t[ii],pp);
+ W2p[ii]=getW2starPrimeFromT(t[ii],pp);
+ pfact=sqrt(W1p[ii]*W1p[ii]+W2p[ii]*W2p[ii]);
+ vtemp[0] = W1[ii];
+ vtemp[1] = W2[ii];
+ density=dBVNtomo(vtemp, pp, 0,normc);
+ t[ii] = density*pfact;
+ if (suff==SS_W1star) t[ii]=W1[ii]*t[ii];
+ else if (suff==SS_W2star) t[ii]=W2[ii]*t[ii];
+ else if (suff==SS_W1star2) t[ii]=W1[ii]*W1[ii]*t[ii];
+ else if (suff==SS_W1W2star) t[ii]=W1[ii]*W2[ii]*t[ii];
+ else if (suff==SS_W2star2) t[ii]=W2[ii]*W2[ii]*t[ii];
+ else if (suff==SS_W1) t[ii]=invLogit(W1[ii])*t[ii];
+ else if (suff==SS_W2) t[ii]=invLogit(W2[ii])*t[ii];
+ else if (suff==SS_Loglik) {
+ if (dim == 3) {
+ //if(pp->setP->verbose>=2 && dim==3) Rprintf("InvSigma loglik: %5g %5g %5g %5g %5g %5g\n",InvSigma[0][0],InvSigma[0][1],InvSigma[1][0],InvSigma[1][1],InvSigma[1][2],InvSigma[2][2]);
+ vtemp[2]=logit(pp->caseP.X,"log-likelihood");
+ mu[0]=pp->setP->pdTheta[1];
+ mu[1]=pp->setP->pdTheta[2];
+ mu[2]=pp->setP->pdTheta[0];
}
+ t[ii]=dMVN(vtemp,mu,InvSigma,dim,0)*pfact;
+ //t[ii]=dMVN3(vtemp,mu,(double*)(&(InvSigma[0][0])),dim,0)*pfact;
+ }
+ else if (suff!=SS_Test) Rprintf("Error Suff= %d",suff);
}
+ }
Free(W1);Free(W1p);Free(W2);Free(W2p);Free(mu);Free(vtemp);
FreeMatrix(Sigma,dim); FreeMatrix(InvSigma,dim);
}
-//Returns the log likelihood of a particular case
+
+/**
+ * Returns the log likelihood of a particular case (i.e, record, datapoint)
+ */
double getLogLikelihood(Param* param) {
- if (param->caseP.dataType==0 && !(param->caseP.Y>=.990 || param->caseP.Y<=.010)) {
- param->caseP.suff=7;
+ if (param->caseP.dataType==DPT_General && !(param->caseP.Y>=.990 || param->caseP.Y<=.010)) {
+ //non-survey data: do integration to find likelihood
+ param->caseP.suff=SS_Loglik;
return log(paramIntegration(&SuffExp,(void*)param));
- }
- else if (param->caseP.dataType==3 || (param->caseP.Y>=.990 || param->caseP.Y<=.010)) {
+
+
+ } else if (param->caseP.dataType==DPT_Homog_X1 || param->caseP.dataType==DPT_Homog_X0) {
+ //Homogenenous data: just do normal likelihood on one dimension
+ double lik,sigma2,val,mu;
+ val = (param->caseP.dataType==DPT_Homog_X1) ? param->caseP.Wstar[0] : param->caseP.Wstar[1];
+ if (!param->setP->ncar) {
+ mu = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[0] : param->setP->pdTheta[1];
+ sigma2 = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[2] : param->setP->pdTheta[3];
+ } else {
+ mu = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[1] : param->setP->pdTheta[2];
+ sigma2 = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[4] : param->setP->pdTheta[5];
+ }
+ lik=(1/(sqrt(2*M_PI*sigma2)))*exp(-(.5/sigma2)*(val - mu)*(val - mu));
+ //return log(lik);
+ return 0; //fix later
+
+ } else if (param->caseP.dataType==DPT_Survey || (param->caseP.Y>=.990 || param->caseP.Y<=.010)) {
+ //Survey data (or v tight bounds): multi-variate normal
int dim=param->setP->ncar ? 3 : 2;
double *mu=doubleArray(dim);
double *vtemp=doubleArray(dim);
double **InvSig=doubleMatrix(dim,dim);/* inverse covariance matrix*/
int i,j;
- for(i=0;i<dim;i++)
+ for(i=0;i<dim;i++) {
for(j=0;j<dim;j++) {
if (dim==3) {
InvSig[i][j]=param->setP->InvSigma3[i][j];
@@ -184,94 +207,127 @@ double getLogLikelihood(Param* param) {
InvSig[i][j]=param->setP->InvSigma[i][j];
}
}
+ }
double loglik;
- vtemp[0] = param->caseP.Wstar[0];
- vtemp[1] = param->caseP.Wstar[1];
- mu[0]= param->caseP.mu[0];
- mu[1]= param->caseP.mu[1];
- if (param->setP->ncar) {
- vtemp[2]=logit(param->caseP.X,"log-likelihood survey");
- mu[0]=param->setP->pdTheta[1];
- mu[1]=param->setP->pdTheta[2];
- mu[2]=param->setP->pdTheta[0];
- loglik=dMVN(vtemp,mu,InvSig,dim,1);
- }
- else {
- loglik=dMVN(vtemp,mu,InvSig,dim,1);
- }
- Free(mu); Free(vtemp); FreeMatrix(InvSig,dim);
- return loglik;
- }
- else {
- Rprintf("Error.\n");
+ vtemp[0] = param->caseP.Wstar[0];
+ vtemp[1] = param->caseP.Wstar[1];
+ mu[0]= param->caseP.mu[0];
+ mu[1]= param->caseP.mu[1];
+ if (param->setP->ncar) {
+ vtemp[2]=logit(param->caseP.X,"log-likelihood survey");
+ mu[0]=param->setP->pdTheta[1];
+ mu[1]=param->setP->pdTheta[2];
+ mu[2]=param->setP->pdTheta[0];
+ loglik=dMVN(vtemp,mu,InvSig,dim,1);
+ }
+ else {
+ loglik=dMVN(vtemp,mu,InvSig,dim,1);
+ }
+ Free(mu); Free(vtemp); FreeMatrix(InvSig,dim);
+ return loglik;
+ }
+ else { //Unknown type
+ Rprintf("Error; unkown type: %d\n",param->caseP.dataType);
return 0;
}
}
-//Finds W2star, given the equation
-//Y=XW1 + (1-X)W2 and the Wistar=logit(Wi)
-//imposs is set to 1 if the equation cannot be satisfied
+/**
+ **********
+ * Line integral helper function
+ **********
+ */
+
+/**
+ * Returns W2star from W1star, given the following equalities
+ * Y=XW1 + (1-X)W2 and the Wi-star=logit(Wi)
+ * mutation: imposs is set to 1 if the equation cannot be satisfied
+ */
double getW2starFromW1star(double X, double Y, double W1star, int* imposs) {
- double W1;
- if (W1star>30) W1=1; //prevent overflow or underflow
- else W1=1/(1+exp(-1*W1star));
- double W2=Y/(1-X)-X*W1/(1-X);
-
- if(W2>=1 || W2<=0) *imposs=1; //impossible pair of values
- else W2=log(W2/(1-W2));
- return W2;
+ double W1;
+ if (W1star>30) W1=1; //prevent overflow or underflow
+ else W1=1/(1+exp(-1*W1star));
+ double W2=Y/(1-X)-X*W1/(1-X);
+
+ if(W2>=1 || W2<=0) *imposs=1; //impossible pair of values
+ else W2=log(W2/(1-W2));
+ return W2;
}
+/**
+ * Returns W1star from W2star, given the following equalities
+ * Y=XW1 + (1-X)W2 and the Wi-star=logit(Wi)
+ * mutation: imposs is set to 1 if the equation cannot be satisfied
+ */
double getW1starFromW2star(double X, double Y, double W2star, int* imposs) {
- double W2;
- if (W2star>30) W2=1; //prevent overflow or underflow
- else W2=1/(1+exp(-1*W2star));
- double W1=(Y-(1-X)*W2)/X;
-
- if(W1>=1 || W1<=0) *imposs=1; //impossible pair of values
- else W1=log(W1/(1-W1));
- return W1;
+ double W2;
+ if (W2star>30) W2=1; //prevent overflow or underflow
+ else W2=1/(1+exp(-1*W2star));
+ double W1=(Y-(1-X)*W2)/X;
+
+ if(W1>=1 || W1<=0) *imposs=1; //impossible pair of values
+ else W1=log(W1/(1-W1));
+ return W1;
}
+/**
+ * Returns W1 from W2, X, and Y given
+ * Y=XW1 + (1-X)W2
+ */
double getW1FromW2(double X, double Y, double W2) {
- return (Y-(1-X)*W2)/X;
+ return (Y-(1-X)*W2)/X;
}
-//W1star(t)
-//W1(t)=(W1_ub - W1_lb)*t + W1_lb
+ /**
+ * W1star(t)
+ * W1(t)=(W1_ub - W1_lb)*t + W1_lb
+ * mutates impossible to true if W1 is non-finite at t
+ */
double getW1starFromT(double t, Param* param, int* imposs) {
- double W1=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0])*t + param->caseP.Wbounds[0][0];
- if (W1==1 || W1==0) *imposs=1;
- else W1=log(W1/(1-W1));
- return W1;
+ double W1=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0])*t + param->caseP.Wbounds[0][0];
+ if (W1==1 || W1==0) *imposs=1;
+ else W1=log(W1/(1-W1));
+ return W1;
}
-//W2star(t)
-//W2(t)=(W2_lb - W2_ub)*t + W2_lb
+
+/**
+ * W2star(t)
+ * W2(t)=(W2_lb - W2_ub)*t + W2_lb
+ */
double getW2starFromT(double t, Param* param, int* imposs) {
- double W2=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1])*t + param->caseP.Wbounds[1][1];
- if (W2==1 || W2==0) *imposs=1;
- else W2=log(W2/(1-W2));
- return W2;
+ double W2=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1])*t + param->caseP.Wbounds[1][1];
+ if (W2==1 || W2==0) *imposs=1;
+ else W2=log(W2/(1-W2));
+ return W2;
}
-//W1star'(t)
-//see paper for derivation: W1*(t) = (1/W1)*((w1_ub - w1_lb)/(1-W1)
+
+/**
+ * W1star'(t)
+ * see paper for derivation: W1*(t) = (1/W1)*((w1_ub - w1_lb)/(1-W1)
+ */
double getW1starPrimeFromT(double t, Param* param) {
- double m=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0]);
- double W1=m*t + param->caseP.Wbounds[0][0];
- W1=(1/W1)*(m/(1-W1));
- return W1;
+ double m=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0]);
+ double W1=m*t + param->caseP.Wbounds[0][0];
+ W1=(1/W1)*(m/(1-W1));
+ return W1;
}
-//W2star'(t)
-//see paper for derivation: W2*(t) = (1/W2)*((w2_lb - w2_ub)/(1-W2)
+
+/**
+ * W2star'(t)
+ * see paper for derivation: W2*(t) = (1/W2)*((w2_lb - w2_ub)/(1-W2)
+ */
double getW2starPrimeFromT(double t, Param* param) {
- double m=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1]);
- double W2=m*t + param->caseP.Wbounds[1][1];
- W2=(1/W2)*(m/(1-W2));
- return W2;
+ double m=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1]);
+ double W2=m*t + param->caseP.Wbounds[1][1];
+ W2=(1/W2)*(m/(1-W2));
+ return W2;
}
-//parameterized integration: bounds always from 0,1
+/**
+ * parameterized line integration
+ * lower bound is t=0, upper bound is t=1
+ */
double paramIntegration(integr_fn f, void *ex) {
double epsabs=pow(10,-11), epsrel=pow(10,-11);
double result=9999, anserr=9999;
@@ -281,8 +337,8 @@ double paramIntegration(integr_fn f, void *ex) {
int *iwork=(int *) Calloc(limit, int);
double *work=(double *)Calloc(lenw, double);
double lb=0.00001; double ub=.99999;
- Rdqags(f, ex, &lb, &ub, &epsabs, &epsrel, &result,
- &anserr, &neval, &ier, &limit, &lenw, &last, iwork, work);
+ Rdqags(f, ex, &lb, &ub, &epsabs, &epsrel, &result,
+ &anserr, &neval, &ier, &limit, &lenw, &last, iwork, work);
Free(iwork);
Free(work);
@@ -297,13 +353,15 @@ double paramIntegration(integr_fn f, void *ex) {
}
-/* integrate normalizing constant and set it in param*/
+/**
+ * integrate normalizing constant and set it in param
+ */
void setNormConst(Param* param) {
- param->caseP.normcT=paramIntegration(&NormConstT,(void*)param);
+ param->caseP.normcT=paramIntegration(&NormConstT,(void*)param);
}
-/*
+/**
* Set the bounds on W1 and W2 in their parameter
*/
void setBounds(Param* param) {
diff --git a/src/gibbsBase.c b/src/gibbsBase.c
index 519d4f9..1c71d0b 100644
--- a/src/gibbsBase.c
+++ b/src/gibbsBase.c
@@ -104,27 +104,33 @@ void cBaseeco(
/* get random seed */
GetRNGstate();
+
/* read the priors */
itemp=0;
for(k=0;k<n_dim;k++)
for(j=0;j<n_dim;j++) S0[j][k]=pdS0[itemp++];
+
/* read the data */
itemp = 0;
for (j = 0; j < n_dim; j++)
for (i = 0; i < n_samp; i++)
X[i][j] = pdX[itemp++];
+
/* Initialize W, Wstar for n_samp */
for (i=0; i< n_samp; i++) {
if (X[i][1]!=0 && X[i][1]!=1) {
W[i][0]=runif(minW1[i], maxW1[i]);
W[i][1]=(X[i][1]-X[i][0]*W[i][0])/(1-X[i][0]);
}
+
if (X[i][1]==0)
for (j=0; j<n_dim; j++) W[i][j]=0.0001;
+
if (X[i][1]==1)
for (j=0; j<n_dim; j++) W[i][j]=0.9999;
+
for (j=0; j<n_dim; j++)
Wstar[i][j]=log(W[i][j])-log(1-W[i][j]);
}
@@ -133,26 +139,43 @@ void cBaseeco(
if (*x1==1)
for (i=0; i<x1_samp; i++) {
W[(n_samp+i)][0]=x1_W1[i];
- if (W[(n_samp+i)][0]==0) W[(n_samp+i)][0]=0.0001;
- if (W[(n_samp+i)][0]==1) W[(n_samp+i)][0]=0.9999;
+
+ if (W[(n_samp+i)][0]==0)
+ W[(n_samp+i)][0]=0.0001;
+
+ if (W[(n_samp+i)][0]==1)
+ W[(n_samp+i)][0]=0.9999;
+
Wstar[(n_samp+i)][0]=log(W[(n_samp+i)][0])-log(1-W[(n_samp+i)][0]);
}
+
if (*x0==1)
for (i=0; i<x0_samp; i++) {
W[(n_samp+x1_samp+i)][1]=x0_W2[i];
- if (W[(n_samp+x1_samp+i)][1]==0) W[(n_samp+x1_samp+i)][1]=0.0001;
- if (W[(n_samp+x1_samp+i)][1]==1) W[(n_samp+x1_samp+i)][1]=0.9999;
+
+ if (W[(n_samp+x1_samp+i)][1]==0)
+ W[(n_samp+x1_samp+i)][1]=0.0001;
+
+ if (W[(n_samp+x1_samp+i)][1]==1)
+ W[(n_samp+x1_samp+i)][1]=0.9999;
+
Wstar[(n_samp+x1_samp+i)][1]=log(W[(n_samp+x1_samp+i)][1])-log(1-W[(n_samp+x1_samp+i)][1]);
}
/* read the survey data */
if (*survey==1) {
itemp = 0;
+
for (j=0; j<n_dim; j++)
for (i=0; i<s_samp; i++) {
S_W[i][j]=sur_W[itemp++];
- if (S_W[i][j]==0) S_W[i][j]=0.0001;
- if (S_W[i][j]==1) S_W[i][j]=0.9999;
+
+ if (S_W[i][j]==0)
+ S_W[i][j]=0.0001;
+
+ if (S_W[i][j]==1)
+ S_W[i][j]=0.9999;
+
S_Wstar[i][j]=log(S_W[i][j])-log(1-S_W[i][j]);
W[(n_samp+x1_samp+x0_samp+i)][j]=S_W[i][j];
Wstar[(n_samp+x1_samp+x0_samp+i)][j]=S_Wstar[i][j];
@@ -172,18 +195,24 @@ void cBaseeco(
itemp = 0;
for(j=0;j<n_dim;j++){
mu[j] = mustart[j];
+
for(k=0;k<n_dim;k++)
Sigma[j][k]=Sigmastart[itemp++];
}
dinv(Sigma, n_dim, InvSigma);
+
+
/*** Gibbs sampler! ***/
if (*verbose)
Rprintf("Starting Gibbs Sampler...\n");
+
for(main_loop=0; main_loop<*n_gen; main_loop++){
/** update W, Wstar given mu, Sigma in regular areas **/
+
for (i=0;i<n_samp;i++){
if ( X[i][1]!=0 && X[i][1]!=1 ) {
+
if (*Grid)
rGrid(W[i], W1g[i], W2g[i], n_grid[i], mu, InvSigma, n_dim);
else
@@ -193,6 +222,7 @@ void cBaseeco(
Wstar[i][0]=log(W[i][0])-log(1-W[i][0]);
Wstar[i][1]=log(W[i][1])-log(1-W[i][1]);
}
+
/* update W2 given W1, mu and Sigma in x1 homeogeneous areas */
if (*x1==1)
@@ -220,6 +250,7 @@ void cBaseeco(
/*store Gibbs draw after burn-in and every nth draws */
if (main_loop>=*burn_in){
itempC++;
+
if (itempC==nth){
pdSMu0[itempA]=mu[0];
pdSMu1[itempA]=mu[1];
@@ -227,6 +258,7 @@ void cBaseeco(
pdSSig01[itempA]=Sigma[0][1];
pdSSig11[itempA]=Sigma[1][1];
itempA++;
+
for(i=0; i<(n_samp+x1_samp+x0_samp); i++){
pdSW1[itempS]=W[i][0];
pdSW2[itempS]=W[i][1];
@@ -235,6 +267,8 @@ void cBaseeco(
itempC=0;
}
}
+
+
if (*verbose)
if (itempP == main_loop) {
Rprintf("%3d percent done.\n", progress*10);
diff --git a/src/gibbsDP.c b/src/gibbsDP.c
index 2356322..40e63b0 100644
--- a/src/gibbsDP.c
+++ b/src/gibbsDP.c
@@ -134,92 +134,134 @@ void cDPeco(
/* get random seed */
GetRNGstate();
+
/* read priors under G0*/
itemp=0;
for(k=0;k<n_dim;k++)
for(j=0;j<n_dim;j++) S0[j][k]=pdS0[itemp++];
+
/* read the data set */
itemp = 0;
for (j = 0; j < n_dim; j++)
for (i = 0; i < n_samp; i++) X[i][j] = pdX[itemp++];
+
/*Intialize W, Wsatr for n_samp */
for (i=0; i< n_samp; i++) {
- if (X[i][1]!=0 && X[i][1]!=1) {
- W[i][0]=runif(minW1[i], maxW1[i]);
- W[i][1]=(X[i][1]-X[i][0]*W[i][0])/(1-X[i][0]);
- }
+
+ if (X[i][1]!=0 && X[i][1]!=1)
+ {
+ W[i][0]=runif(minW1[i], maxW1[i]);
+ W[i][1]=(X[i][1]-X[i][0]*W[i][0])/(1-X[i][0]);
+ }
+
if (X[i][1]==0)
for (j=0; j<n_dim; j++) W[i][j]=0.0001;
- if (X[i][1]==1)
+
+ if (X[i][1]==1)
for (j=0; j<n_dim; j++) W[i][j]=0.9999;
+
for (j=0; j<n_dim; j++)
Wstar[i][j]=log(W[i][j])-log(1-W[i][j]);
}
/*read homeogenous areas information */
if (*x1==1)
- for (i=0; i<x1_samp; i++) {
- W[(n_samp+i)][0]=x1_W1[i];
- if (W[(n_samp+i)][0]==0) W[(n_samp+i)][0]=0.0001;
- if (W[(n_samp+i)][0]==1) W[(n_samp+i)][0]=0.9999;
- Wstar[(n_samp+i)][0]=log(W[(n_samp+i)][0])-log(1-W[(n_samp+i)][0]);
- }
+ for (i=0; i<x1_samp; i++)
+ {
+ W[(n_samp+i)][0]=x1_W1[i];
+
+ if (W[(n_samp+i)][0]==0)
+ W[(n_samp+i)][0]=0.0001;
+
+ if (W[(n_samp+i)][0]==1)
+ W[(n_samp+i)][0]=0.9999;
+
+ Wstar[(n_samp+i)][0]=log(W[(n_samp+i)][0])-log(1-W[(n_samp+i)][0]);
+ }
if (*x0==1)
- for (i=0; i<x0_samp; i++) {
- W[(n_samp+x1_samp+i)][1]=x0_W2[i];
- if (W[(n_samp+x1_samp+i)][1]==0) W[(n_samp+x1_samp+i)][1]=0.0001;
- if (W[(n_samp+x1_samp+i)][1]==1) W[(n_samp+x1_samp+i)][1]=0.9999;
- Wstar[(n_samp+x1_samp+i)][1]=log(W[(n_samp+x1_samp+i)][1])-log(1-W[(n_samp+x1_samp+i)][1]);
- }
+ for (i=0; i<x0_samp; i++)
+ {
+ W[(n_samp+x1_samp+i)][1]=x0_W2[i];
- /*read the survey data */
- if (*survey==1) {
- itemp = 0;
- for (j=0; j<n_dim; j++)
- for (i=0; i<s_samp; i++) {
- S_W[i][j]=sur_W[itemp++];
- if (S_W[i][j]==0) S_W[i][j]=0.0001;
- if (S_W[i][j]==1) S_W[i][j]=0.9999;
- S_Wstar[i][j]=log(S_W[i][j])-log(1-S_W[i][j]);
- W[n_samp+x1_samp+x0_samp+i][j]=S_W[i][j];
- Wstar[n_samp+x1_samp+x0_samp+i][j]=S_Wstar[i][j];
+ if (W[(n_samp+x1_samp+i)][1]==0)
+ W[(n_samp+x1_samp+i)][1]=0.0001;
+
+ if (W[(n_samp+x1_samp+i)][1]==1)
+ W[(n_samp+x1_samp+i)][1]=0.9999;
+
+ Wstar[(n_samp+x1_samp+i)][1]=log(W[(n_samp+x1_samp+i)][1])-log(1-W[(n_samp+x1_samp+i)][1]);
}
- }
+
+ /*read the survey data */
+ if (*survey==1)
+ {
+ itemp = 0;
+
+ for (j=0; j<n_dim; j++)
+ for (i=0; i<s_samp; i++)
+ {
+ S_W[i][j]=sur_W[itemp++];
+
+ if (S_W[i][j]==0)
+ S_W[i][j]=0.0001;
+
+ if (S_W[i][j]==1)
+ S_W[i][j]=0.9999;
+
+ S_Wstar[i][j]=log(S_W[i][j])-log(1-S_W[i][j]);
+ W[n_samp+x1_samp+x0_samp+i][j]=S_W[i][j];
+ Wstar[n_samp+x1_samp+x0_samp+i][j]=S_Wstar[i][j];
+ }
+ }
+
/* Calcualte grids */
if (*Grid)
GridPrep(W1g,W2g, X, maxW1, minW1, n_grid, n_samp, n_step);
+
/* parmeters for Bivaraite t-distribution-unchanged in MCMC */
for (j=0;j<n_dim;j++)
for(k=0;k<n_dim;k++)
mtemp[j][k]=S0[j][k]*(1+tau0)/(tau0*(nu0-n_dim+1));
+
dinv(mtemp, n_dim, S_bvt);
/**draw initial values of mu_i, Sigma_i under G0 for all effective sample**/
/*1. Sigma_i under InvWish(nu0, S0^-1) with E(Sigma)=S0/(nu0-3)*/
/* InvSigma_i under Wish(nu0, S0^-1 */
/*2. mu_i|Sigma_i under N(mu0, Sigma_i/tau0) */
+
dinv(S0, n_dim, mtemp);
- for(i=0;i<t_samp;i++){
- /*draw from wish(nu0, S0^-1) */
- rWish(InvSigma[i], mtemp, nu0, n_dim);
- dinv(InvSigma[i], n_dim, Sigma[i]);
- for (j=0;j<n_dim;j++)
- for(k=0;k<n_dim;k++) mtemp1[j][k]=Sigma[i][j][k]/tau0;
- rMVN(mu[i], mu0, mtemp1, n_dim);
- }
+
+ for(i=0;i<t_samp;i++)
+ {
+ /*draw from wish(nu0, S0^-1) */
+ rWish(InvSigma[i], mtemp, nu0, n_dim);
+ dinv(InvSigma[i], n_dim, Sigma[i]);
+
+ for (j=0;j<n_dim;j++)
+ for(k=0;k<n_dim;k++)
+ mtemp1[j][k]=Sigma[i][j][k]/tau0;
+
+ rMVN(mu[i], mu0, mtemp1, n_dim);
+ }
+
/* initialize the cluster membership */
+
nstar=t_samp; /* the # of disticnt values */
+
for(i=0;i<t_samp;i++)
C[i]=i; /*cluster is from 0...n_samp-1 */
+
if (*verbose)
Rprintf("Starting Gibbs Sampler...\n");
+
for(main_loop=0; main_loop<*n_gen; main_loop++){
/**update W, Wstar given mu, Sigma only for the unknown W/Wstar**/
for (i=0;i<n_samp;i++){
@@ -229,24 +271,27 @@ void cDPeco(
else
rMH(W[i], X[i], minW1[i], maxW1[i], mu[i], InvSigma[i], n_dim);
}
+
/*3 compute Wsta_i from W_i*/
Wstar[i][0]=log(W[i][0])-log(1-W[i][0]);
Wstar[i][1]=log(W[i][1])-log(1-W[i][1]);
}
- if (*x1==1)
- for (i=0; i<x1_samp; i++) {
- dtemp=mu[n_samp+i][1]+Sigma[n_samp+i][0][1]/Sigma[n_samp+i][0][0]*(Wstar[n_samp+i][0]-mu[n_samp+i][0]);
- dtemp1=Sigma[n_samp+i][1][1]*(1-Sigma[n_samp+i][0][1]*Sigma[n_samp+i][0][1]/(Sigma[n_samp+i][0][0]*Sigma[n_samp+i][1][1]));
- Wstar[n_samp+i][1]=norm_rand()*sqrt(dtemp1)+dtemp;
- W[n_samp+i][1]=exp(Wstar[n_samp+i][1])/(1+exp(Wstar[n_samp+i][1]));
- }
+ if (*x1==1)
+ for (i=0; i<x1_samp; i++) {
+ dtemp=mu[n_samp+i][1]+Sigma[n_samp+i][0][1]/Sigma[n_samp+i][0][0]*(Wstar[n_samp+i][0]-mu[n_samp+i][0]);
+ dtemp1=Sigma[n_samp+i][1][1]*(1-Sigma[n_samp+i][0][1]*Sigma[n_samp+i][0][1]/(Sigma[n_samp+i][0][0]*Sigma[n_samp+i][1][1]));
+
+ Wstar[n_samp+i][1]=norm_rand()*sqrt(dtemp1)+dtemp;
+ W[n_samp+i][1]=exp(Wstar[n_samp+i][1])/(1+exp(Wstar[n_samp+i][1]));
+ }
/*update W1 given W2, mu_ord and Sigma_ord in x0 homeogeneous areas */
if (*x0==1)
for (i=0; i<x0_samp; i++) {
dtemp=mu[n_samp+x1_samp+i][0]+Sigma[n_samp+x1_samp+i][0][1]/Sigma[n_samp+x1_samp+i][1][1]*(Wstar[n_samp+x1_samp+i][1]-mu[n_samp+x1_samp+i][1]);
dtemp1=Sigma[n_samp+x1_samp+i][0][0]*(1-Sigma[n_samp+x1_samp+i][0][1]*Sigma[n_samp+x1_samp+i][0][1]/(Sigma[n_samp+x1_samp+i][0][0]*Sigma[n_samp+x1_samp+i][1][1]));
+
Wstar[n_samp+i][0]=norm_rand()*sqrt(dtemp1)+dtemp;
W[n_samp+x1_samp+i][0]=exp(Wstar[n_samp+x1_samp+i][0])/(1+exp(Wstar[n_samp+x1_samp+i][0]));
}
@@ -260,30 +305,40 @@ void cDPeco(
q[j]=dMVN(Wstar[i], mu[j], InvSigma[j], n_dim, 0);
else
q[j]=alpha*dMVT(Wstar[i], mu0, S_bvt, nu0-n_dim+1, 2, 0);
+
dtemp+=q[j];
qq[j]=dtemp; /*compute qq, the cumlative of q*/
}
+
/*standardize q and qq */
- for (j=0; j<t_samp; j++) qq[j]/=dtemp;
+ for (j=0; j<t_samp; j++)
+ qq[j]/=dtemp;
/** draw the configuration parameter **/
j=0; dtemp=unif_rand();
- while (dtemp > qq[j]) j++;
+
+ while (dtemp > qq[j])
+ j++;
+
+
/** Dirichlet update Sigma_i, mu_i|Sigma_i **/
/* j=i: posterior update given Wstar[i] */
if (j==i){
onedata[0][0] = Wstar[i][0];
onedata[0][1] = Wstar[i][1];
+
NIWupdate(onedata, mu[i], Sigma[i], InvSigma[i], mu0, tau0,nu0, S0, 1, n_dim);
C[i]=nstar;
nstar++;
}
+
/* j=i': replace with i' obs */
else {
/*1. mu_i=mu_j, Sigma_i=Sigma_j*/
/*2. update C[i]=C[j] */
for(k=0;k<n_dim;k++) {
mu[i][k]=mu[j][k];
+
for(l=0;l<n_dim;l++) {
Sigma[i][k][l]=Sigma[j][k][l];
InvSigma[i][k][l]=InvSigma[j][k][l];
@@ -294,11 +349,16 @@ void cDPeco(
sortC[i]=C[i];
} /* end of i loop*/
+
/** remixing step using effective sample of Wstar**/
- for(i=0;i<t_samp;i++) indexC[i]=i;
+ for(i=0;i<t_samp;i++)
+ indexC[i]=i;
+
R_qsort_int_I(sortC, indexC, 1, t_samp);
- nstar=0; i=0;
+ nstar=0;
+ i=0;
+
while (i<t_samp){
j=sortC[i]; /*saves the first element in a block of same values */
nj=0; /* counter for a block of same values */
@@ -306,20 +366,26 @@ void cDPeco(
/* get data for remixing */
while ((sortC[i]==j) && (i<t_samp)) {
label[nj]=indexC[i];
+
for (k=0; k<n_dim; k++)
Wstarmix[nj][k]=Wstar[label[nj]][k];
+
nj++; i++;
} /* i records the current position in IndexC */
/* nj records the # of obs in Psimix */
+
/** posterior update for mu_mix, Sigma_mix based on Psimix **/
NIWupdate(Wstarmix, mu_mix,Sigma_mix, InvSigma_mix, mu0, tau0, nu0, S0, nj, n_dim);
+
/**update mu, Simgat with mu_mix, Sigmat_mix via label**/
for (j=0;j<nj;j++){
C[label[j]]=nstar; /* updating C vector with no gap */
+
for (k=0; k<n_dim; k++){
mu[label[j]][k]=mu_mix[k];
+
for (l=0;l<n_dim;l++){
Sigma[label[j]][k][l]=Sigma_mix[k][l];
InvSigma[label[j]][k][l]=InvSigma_mix[k][l];
@@ -328,19 +394,24 @@ void cDPeco(
}
nstar++; /*finish update one distinct value*/
} /* nstar is the number of distinct values */
+
+
/** updating alpha **/
if(*pinUpdate) {
dtemp=b0-log(rbeta(alpha+1, (double) t_samp));
dtemp1=(double)(a0+nstar-1)/(t_samp*dtemp);
+
if(unif_rand() < dtemp1)
alpha=rgamma(a0+nstar, 1/dtemp);
else
alpha=rgamma(a0+nstar-1, 1/dtemp);
}
+
/*store Gibbs draws after burn_in */
R_CheckUserInterrupt();
+
if (main_loop>=*burn_in) {
itempC++;
if (itempC==nth){
diff --git a/src/gibbsEM.c b/src/gibbsEM.c
index 83b7de2..50f8098 100644
--- a/src/gibbsEM.c
+++ b/src/gibbsEM.c
@@ -19,7 +19,7 @@ void ecoSEM(double* optTheta, double* pdTheta, Param* params, double Rmat_old[7]
void ecoEStep(Param* params, double* suff);
void ecoMStep(double* Suff, double* pdTheta, Param* params);
void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params);
-void ecoMStepCCAR(double* Suff, double* pdTheta, Param* params);
+void ecoMStepCCAR(double* pdTheta, Param* params);
void MStepHypTest(Param* params, double* pdTheta);
void initTheta(double* pdTheta_in,Param* params, double* pdTheta);
void initNCAR(Param* params, double* pdTheta);
@@ -31,6 +31,13 @@ void transformTheta(double* pdTheta, double* t_pdTheta, int len, setParam* setP)
void untransformTheta(double* t_pdTheta,double* pdTheta, int len, setParam* setP);
void ncarFixedRhoTransform(double* pdTheta);
void ncarFixedRhoUnTransform(double* pdTheta);
+void printColumnHeader(int main_loop, int iteration_max, setParam* setP, int finalTheta);
+
+/**
+ * Main function.
+ * Important mutations (i.e., outputs): pdTheta, Suff, DMmatrix, history
+ * See internal comments for details.
+ */
void cEMeco(
/*data input */
@@ -41,7 +48,7 @@ void cEMeco(
int *pin_samp, /* sample size */
/* loop vairables */
- int *iteration_max, /* number of maximum interations */
+ int *iteration_max, /* number of maximum iterations */
double *convergence, /* abs value limit before stopping */
/*incorporating survey data */
@@ -87,7 +94,8 @@ void cEMeco(
int s_samp = *survey ? *sur_samp : 0; /* sample size of survey data */
int x1_samp = *x1 ? *sampx1 : 0; /* sample size for X=1 */
int x0_samp = *x0 ? *sampx0 : 0; /* sample size for X=0 */
- int t_samp=n_samp+s_samp+x1_samp+x0_samp; /* total sample size*/
+ //int t_samp=n_samp+s_samp+x1_samp+x0_samp; /* total sample size*/
+ int t_samp=n_samp+s_samp; /* total sample size, ignoring homog data*/
int n_dim=2; /* dimensions */
setParam setP;
@@ -146,144 +154,155 @@ void cEMeco(
-/***Begin main loop ***/
-main_loop=1;start=1;
-while (main_loop<=*iteration_max && (start==1 ||
- (setP.sem==0 && !closeEnough(t_pdTheta,t_pdTheta_old,param_len,*convergence)) ||
- (setP.sem==1 && !semDoneCheck((setParam*)&setP)))) {
-//while (main_loop<=*iteration_max && (start==1 || !closeEnough(transformTheta(pdTheta),transformTheta(pdTheta_old),param_len,*convergence))) {
+ /***Begin main loop ***/
+ main_loop=1;start=1;
+ while (main_loop<=*iteration_max && (start==1 ||
+ (setP.sem==0 && !closeEnough(t_pdTheta,t_pdTheta_old,param_len,*convergence)) ||
+ (setP.sem==1 && !semDoneCheck((setParam*)&setP)))) {
+ //while (main_loop<=*iteration_max && (start==1 || !closeEnough(transformTheta(pdTheta),transformTheta(pdTheta_old),param_len,*convergence))) {
- setP.iter=main_loop;
- if (start) {
- initTheta(pdTheta_in,params,pdTheta);
- transformTheta(pdTheta,t_pdTheta,param_len, &setP);
- setHistory(t_pdTheta,0,0,(setParam*)&setP,history_full);
- if (!setP.ncar) {
- for(i=0;i<t_samp;i++) {
- params[i].caseP.mu[0] = pdTheta[0];
- params[i].caseP.mu[1] = pdTheta[1];
+ setP.iter=main_loop;
+ if (start) {
+ initTheta(pdTheta_in,params,pdTheta);
+ transformTheta(pdTheta,t_pdTheta,param_len, &setP);
+ setHistory(t_pdTheta,0,0,(setParam*)&setP,history_full);
+ if (!setP.ncar) {
+ for(i=0;i<t_samp;i++) {
+ params[i].caseP.mu[0] = pdTheta[0];
+ params[i].caseP.mu[1] = pdTheta[1];
+ }
+ setP.Sigma[0][0] = pdTheta[2];
+ setP.Sigma[1][1] = pdTheta[3];
+ setP.Sigma[0][1] = pdTheta[4]*sqrt(pdTheta[2]*pdTheta[3]);
+ setP.Sigma[1][0] = setP.Sigma[0][1];
+ dinv2D((double*)&setP.Sigma[0][0], 2, (double*)&setP.InvSigma[0][0], "Start of main loop");
}
- setP.Sigma[0][0] = pdTheta[2];
- setP.Sigma[1][1] = pdTheta[3];
- setP.Sigma[0][1] = pdTheta[4]*sqrt(pdTheta[2]*pdTheta[3]);
- setP.Sigma[1][0] = setP.Sigma[0][1];
- dinv2D((double*)&setP.Sigma[0][0], 2, (double*)&setP.InvSigma[0][0], "Start of main loop");
- }
- else {
- if (setP.fixedRho) ncarFixedRhoTransform(pdTheta);
- initNCAR(params,pdTheta);
- if (setP.fixedRho) ncarFixedRhoUnTransform(pdTheta);
+ else {
+ if (setP.fixedRho) ncarFixedRhoTransform(pdTheta);
+ initNCAR(params,pdTheta);
+ if (setP.fixedRho) ncarFixedRhoUnTransform(pdTheta);
+ }
+ start=0;
}
- start=0;
- }
- for(i=0;i<param_len;i++) setP.pdTheta[i]=pdTheta[i];
-
- if (setP.verbose>=1) {
- Rprintf("cycle %d/%d:",main_loop,*iteration_max);
- for(i=0;i<param_len;i++)
- if (setP.varParam[i])
- Rprintf(" %.3f",pdTheta[i]);
- if (setP.calcLoglik==1 && main_loop>2)
- Rprintf(" Prev LL: %5g",Suff[setP.suffstat_len]);
- Rprintf("\n");
- }
- //keep the old theta around for comaprison
- for(i=0;i<param_len;i++) pdTheta_old[i]=pdTheta[i];
- transformTheta(pdTheta_old,t_pdTheta_old,param_len,&setP);
+ for(i=0;i<param_len;i++) setP.pdTheta[i]=pdTheta[i];
+ if (setP.verbose>=1) {
+ if ((main_loop - 1) % 15 == 0) printColumnHeader(main_loop,*iteration_max,&setP,0);
- ecoEStep(params, Suff);
- if (!setP.ncar)
- ecoMStep(Suff,pdTheta,params);
- else
- ecoMStepNCAR(Suff,pdTheta,params);
- transformTheta(pdTheta,t_pdTheta,param_len,&setP);
- //char ch;
- //scanf(" %c", &ch );
-
- //if we're in the second run through of SEM
- if (setP.sem==1) {
- ecoSEM(optTheta, pdTheta, params, Rmat_old, Rmat);
- }
- else {
- setHistory(t_pdTheta,(main_loop<=1) ? 0 : Suff[setP.suffstat_len],main_loop,(setParam*)&setP,history_full);
- }
-
-
- if (setP.verbose>=2) {
- Rprintf("theta and suff\n");
- if (param_len>5) {
- Rprintf("%10g%10g%10g%10g%10g%10g%10g%10g%10g\n",pdTheta[0],pdTheta[1],pdTheta[2],pdTheta[3],pdTheta[4],pdTheta[5],pdTheta[6],pdTheta[7],pdTheta[8]);
+ Rprintf("cycle %d/%d:",main_loop,*iteration_max);
+ for(i=0;i<param_len;i++)
+ if (setP.varParam[i]) {
+ if (pdTheta[i]>=0) Rprintf("% 5.3f",pdTheta[i]);
+ else Rprintf(" % 5.2f",pdTheta[i]);
+ }
+ if (setP.calcLoglik==1 && main_loop>2)
+ Rprintf(" Prev LL: %5.2f",Suff[setP.suffstat_len]);
+ Rprintf("\n");
+ }
+ //keep the old theta around for comaprison
+ for(i=0;i<param_len;i++) pdTheta_old[i]=pdTheta[i];
+ transformTheta(pdTheta_old,t_pdTheta_old,param_len,&setP);
+
+
+ ecoEStep(params, Suff);
+ if (!setP.ncar)
+ ecoMStep(Suff,pdTheta,params);
+ else
+ ecoMStepNCAR(Suff,pdTheta,params);
+ transformTheta(pdTheta,t_pdTheta,param_len,&setP);
+ //char ch;
+ //scanf(" %c", &ch );
+
+ //if we're in the second run through of SEM
+ if (setP.sem==1) {
+ ecoSEM(optTheta, pdTheta, params, Rmat_old, Rmat);
}
else {
- Rprintf("%10g%10g%10g%10g%10g (%10g)\n",pdTheta[0],pdTheta[1],pdTheta[2],pdTheta[3],pdTheta[4],pdTheta[4]*sqrt(pdTheta[2]*pdTheta[3]));
+ setHistory(t_pdTheta,(main_loop<=1) ? 0 : Suff[setP.suffstat_len],main_loop,(setParam*)&setP,history_full);
}
- Rprintf("%10g%10g%10g%10g%10g\n",Suff[0],Suff[1],Suff[2],Suff[3],Suff[4]);
- Rprintf("Sig: %10g%10g%10g\n",setP.Sigma[0][0],setP.Sigma[1][1],setP.Sigma[0][1]);
- if (setP.ncar) Rprintf("Sig3: %10g%10g%10g%10g\n",setP.Sigma3[0][0],setP.Sigma3[1][1],setP.Sigma3[2][2]);
- //char x;
- //R_ReadConsole("hit enter\n",(char*)&x,4,0);
- }
- main_loop++;
- R_FlushConsole();
- R_CheckUserInterrupt();
-}
-/***End main loop ***/
-//finish up: record results and loglik
-Param* param;
-Suff[setP.suffstat_len]=0.0;
-for(i=0;i<param_len;i++) setP.pdTheta[i]=pdTheta[i];
-for(i=0;i<t_samp;i++) {
- param=&(params[i]);
- if(i<n_samp) {
- for(j=0;j<2;j++) inSample[i*2+j]=param->caseP.W[j];
- //setBounds(param);
- //setNormConst(param);
+
+ if (setP.verbose>=2) {
+ Rprintf("theta and suff\n");
+ if (param_len>5) {
+ Rprintf("%10g%10g%10g%10g%10g%10g%10g%10g%10g\n",pdTheta[0],pdTheta[1],pdTheta[2],pdTheta[3],pdTheta[4],pdTheta[5],pdTheta[6],pdTheta[7],pdTheta[8]);
+ }
+ else {
+ Rprintf("%10g%10g%10g%10g%10g (%10g)\n",pdTheta[0],pdTheta[1],pdTheta[2],pdTheta[3],pdTheta[4],pdTheta[4]*sqrt(pdTheta[2]*pdTheta[3]));
+ }
+ Rprintf("%10g%10g%10g%10g%10g\n",Suff[0],Suff[1],Suff[2],Suff[3],Suff[4]);
+ Rprintf("Sig: %10g%10g%10g\n",setP.Sigma[0][0],setP.Sigma[1][1],setP.Sigma[0][1]);
+ if (setP.ncar) Rprintf("Sig3: %10g%10g%10g%10g\n",setP.Sigma3[0][0],setP.Sigma3[1][1],setP.Sigma3[2][2]);
+ //char x;
+ //R_ReadConsole("hit enter\n",(char*)&x,4,0);
+ }
+ main_loop++;
+ R_FlushConsole();
+ R_CheckUserInterrupt();
}
- Suff[setP.suffstat_len]+=getLogLikelihood(param);
-}
-if (setP.verbose>=1) {
- Rprintf("Final Theta:");
- for(i=0;i<param_len;i++) Rprintf(" %.3f",pdTheta[i]);
- if (setP.calcLoglik==1 && main_loop>2) {
- Rprintf(" Final LL: %5g",Suff[setP.suffstat_len]);
- history_full[main_loop-1][param_len]=Suff[setP.suffstat_len];
+ /***End main loop ***/
+ //finish up: record results and loglik
+ Param* param;
+ Suff[setP.suffstat_len]=0.0;
+ for(i=0;i<param_len;i++) setP.pdTheta[i]=pdTheta[i];
+ for(i=0;i<t_samp;i++) {
+ param=&(params[i]);
+ if(i<n_samp) {
+ for(j=0;j<2;j++) inSample[i*2+j]=param->caseP.W[j];
+ //setBounds(param);
+ //setNormConst(param);
}
- Rprintf("\n");
+ Suff[setP.suffstat_len]+=getLogLikelihood(param);
}
-//set the DM matrix (only matters for SEM)
-if (setP.sem==1) {
- int DMlen=0;
- for(i=0; i<param_len;i++)
- if(setP.varParam[i]) DMlen++;
- for(i=0;i<DMlen;i++)
- for(j=0;j<DMlen;j++)
- DMmatrix[i*DMlen+j]=Rmat[i][j];
-}
+ if (setP.verbose>=1) {
+ printColumnHeader(main_loop,*iteration_max,&setP,1);
+ Rprintf("Final Theta:");
+ for(i=0;i<param_len;i++) {
+ if (pdTheta[i]>=0) Rprintf("% 5.3f",pdTheta[i]);
+ else Rprintf(" % 5.2f",pdTheta[i]);
+ }
+ if (setP.calcLoglik==1 && main_loop>2) {
+ Rprintf(" Final LL: %5.2f",Suff[setP.suffstat_len]);
+ history_full[main_loop-1][param_len]=Suff[setP.suffstat_len];
+ }
+ Rprintf("\n");
+ }
-*itersUsed=main_loop;
-for(i=0;i<(*itersUsed);i++) {
- for(j=0;j<(param_len+1);j++)
- history[i*(param_len+1)+j]=history_full[i][j];
-}
+ //set the DM matrix (only matters for SEM)
+ if (setP.sem==1) {
+ int DMlen=0;
+ for(i=0; i<param_len;i++)
+ if(setP.varParam[i]) DMlen++;
+ for(i=0;i<DMlen;i++)
+ for(j=0;j<DMlen;j++)
+ DMmatrix[i*DMlen+j]=Rmat[i][j];
+ }
+ *itersUsed=main_loop;
+ for(i=0;i<(*itersUsed);i++) {
+ for(j=0;j<(param_len+1);j++)
+ history[i*(param_len+1)+j]=history_full[i][j];
+ }
-/* write out the random seed */
-PutRNGstate();
-/* Freeing the memory */
-Free(pdTheta_old);
-//FreeMatrix(Rmat_old,5);
-//FreeMatrix(Rmat,5);
-}
+ /* write out the random seed */
+ PutRNGstate();
-//initializes Theta, varParam, and semDone
-//input: pdTheta_in,params
-//mutates: params.setP, pdTheta
-//NCAR theta: mu_3, mu_1, mu_2, sig_3, sig_1, sig_2, r_13, r_23, r_12
+ /* Freeing the memory */
+ Free(pdTheta_old);
+ //FreeMatrix(Rmat_old,5);
+ //FreeMatrix(Rmat,5);
+ }
+
+/**
+ * initializes Theta, varParam, and semDone
+ * input: pdTheta_in,params
+ * mutates: params.setP, pdTheta
+ * CAR theta: mu_1, mu_2, sig_1, sig_2, rho_12
+ * NCAR theta: mu_3, mu_1, mu_2, sig_3, sig_1, sig_2, r_13, r_23, r_12
+ */
void initTheta(double* pdTheta_in,Param* params, double* pdTheta) {
setParam* setP=params[0].setP;
int param_len=setP->param_len;
@@ -335,25 +354,23 @@ void initTheta(double* pdTheta_in,Param* params, double* pdTheta) {
* NCAR: (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik
**/
-
-
void ecoEStep(Param* params, double* suff) {
-int t_samp,n_samp,s_samp,x1_samp,x0_samp,i,j,temp0,temp1, verbose;
-double loglik,testdens;
-Param* param; setParam* setP; caseParam* caseP;
-setP=params[0].setP;
-verbose=setP->verbose;
+ int t_samp,n_samp,s_samp,x1_samp,x0_samp,i,j,temp0,temp1, verbose;
+ double loglik,testdens;
+ Param* param; setParam* setP; caseParam* caseP;
+ setP=params[0].setP;
+ verbose=setP->verbose;
-t_samp=setP->t_samp;
-n_samp=setP->n_samp;
-x1_samp=setP->x1_samp;
-x0_samp=setP->x0_samp;
-s_samp=setP->s_samp;
+ t_samp=setP->t_samp;
+ n_samp=setP->n_samp;
+ x1_samp=setP->x1_samp;
+ x0_samp=setP->x0_samp;
+ s_samp=setP->s_samp;
double **Wstar=doubleMatrix(t_samp,5); /* pseudo data(transformed)*/
-loglik=0;
-if (verbose>=3 && !setP->sem) Rprintf("E-step start\n");
+ loglik=0;
+ if (verbose>=3 && !setP->sem) Rprintf("E-step start\n");
for (i = 0; i<n_samp; i++) {
param = &(params[i]);
caseP=&(param->caseP);
@@ -385,71 +402,64 @@ if (verbose>=3 && !setP->sem) Rprintf("E-step start\n");
if (j<2)
caseP->Wstar[j]=Wstar[i][j];
}
- caseP->suff=5;
+ caseP->suff=SS_W1;
caseP->W[0]=paramIntegration(&SuffExp,param);
- caseP->suff=6;
+ caseP->suff=SS_W2;
caseP->W[1]=paramIntegration(&SuffExp,param);
- caseP->suff=-1;
+ caseP->suff=SS_Test;
testdens=paramIntegration(&SuffExp,param);
if (setP->calcLoglik==1 && setP->iter>1) loglik+=getLogLikelihood(param);
- //report E0 if norm const is extremely high or low
- //if((caseP->mu[1] > 1.57685) && (caseP->mu[2]<-1.973)) {
-//Rprintf("HIT! %d %5g %5g %5g %5g %5g %5g %5g %5g err:%5g\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], caseP->normcT,Wstar[i][0],Wstar[i][1],Wstar[i][2],fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1])));
- //}
- //if (fabs(caseP->normcT)<pow(10,-7) || fabs(caseP->normcT)>pow(10,10)) {
- // Rprintf("E0 %d %5g %5g %5g %5g %5g %5g %5g %5g err:%5g\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], caseP->normcT,Wstar[i][0],Wstar[i][1],Wstar[i][2],fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1])));
- //}
- //report error E1 if E[W1],E[W2] is not on the tomography line
- if (fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1]))>0.011) {
- Rprintf("E1 %d %5g %5g %5g %5g %5g %5g %5g %5g err:%5g\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], caseP->normcT,Wstar[i][0],Wstar[i][1],Wstar[i][2],fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1])));
- char ch;
- scanf("Hit enter to continue %c\n", &ch );
- }
- //report error E2 if Jensen's inequality doesn't hold
- if (Wstar[i][4]<pow(Wstar[i][1],2) || Wstar[i][2]<pow(Wstar[i][0],2))
- Rprintf("E2 %d %5g %5g %5g %5g %5g %5g %5g %5g\n", i, caseP->X, caseP->Y, caseP->normcT, caseP->mu[1],Wstar[i][0],Wstar[i][1],Wstar[i][2],Wstar[i][4]);
- //used for debugging if necessary
- if (verbose>=2 && !setP->sem && ((i<10 && verbose>=3) || (caseP->mu[1] < -1.7 && caseP->mu[0] > 1.4)))
- Rprintf("%d %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], param->setP->Sigma[0][1], caseP->normcT, caseP->W[0],caseP->W[1],Wstar[i][2]);
+ //report error E1 if E[W1],E[W2] is not on the tomography line
+ if (fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1]))>0.011) {
+ Rprintf("E1 %d %5g %5g %5g %5g %5g %5g %5g %5g err:%5g\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], caseP->normcT,Wstar[i][0],Wstar[i][1],Wstar[i][2],fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1])));
+ char ch;
+ scanf("Hit enter to continue %c\n", &ch );
+ }
+ //report error E2 if Jensen's inequality doesn't hold
+ if (Wstar[i][4]<pow(Wstar[i][1],2) || Wstar[i][2]<pow(Wstar[i][0],2))
+ Rprintf("E2 %d %5g %5g %5g %5g %5g %5g %5g %5g\n", i, caseP->X, caseP->Y, caseP->normcT, caseP->mu[1],Wstar[i][0],Wstar[i][1],Wstar[i][2],Wstar[i][4]);
+ //used for debugging if necessary
+ if (verbose>=2 && !setP->sem && ((i<10 && verbose>=3) || (caseP->mu[1] < -1.7 && caseP->mu[0] > 1.4)))
+ Rprintf("%d %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], param->setP->Sigma[0][1], caseP->normcT, caseP->W[0],caseP->W[1],Wstar[i][2]);
}
}
- /* analytically compute E{W2_i|Y_i} given W1_i, mu and Sigma in x1 homeogeneous areas */
- for (i=n_samp; i<n_samp+x1_samp; i++) {
- temp0=params[i].caseP.Wstar[0];
- temp1=params[i].caseP.mu[1]+setP->Sigma[0][1]/setP->Sigma[0][0]*(temp0-params[i].caseP.mu[0]);
- Wstar[i][0]=temp0;
- Wstar[i][1]=temp1;
- Wstar[i][2]=temp0*temp0;
- Wstar[i][3]=temp0*temp1;
- Wstar[i][4]=temp1*temp1;
- }
- /*analytically compute E{W1_i|Y_i} given W2_i, mu and Sigma in x0 homeogeneous areas */
- for (i=n_samp+x1_samp; i<n_samp+x1_samp+x0_samp; i++) {
- temp1=params[i].caseP.Wstar[1];
- temp0=params[i].caseP.mu[0]+setP->Sigma[0][1]/setP->Sigma[1][1]*(temp1-params[i].caseP.mu[1]);
- Wstar[i][0]=temp0;
- Wstar[i][1]=temp1;
- Wstar[i][2]=temp0*temp0;
- Wstar[i][3]=temp0*temp1;
- Wstar[i][4]=temp1*temp1;
- }
+ /* Use the values given by the survey data */
+ //Calculate loglik also
+ for (i=n_samp; i<n_samp+s_samp; i++) {
+ param = &(params[i]);
+ caseP=&(param->caseP);
+ Wstar[i][0]=caseP->Wstar[0];
+ Wstar[i][1]=caseP->Wstar[1];
+ Wstar[i][2]=Wstar[i][0]*Wstar[i][0];
+ Wstar[i][3]=Wstar[i][0]*Wstar[i][1];
+ Wstar[i][4]=Wstar[i][1]*Wstar[i][1];
+ if (setP->calcLoglik==1 && setP->iter>1) loglik+=getLogLikelihood(param);
+ }
- /* Use the values given by the survey data */
- //Calculate loglik also
- for (i=n_samp+x1_samp+x0_samp; i<n_samp+x1_samp+x0_samp+s_samp; i++) {
- param = &(params[i]);
- caseP=&(param->caseP);
- Wstar[i][0]=caseP->Wstar[0];
- Wstar[i][1]=caseP->Wstar[1];
- Wstar[i][2]=Wstar[i][0]*Wstar[i][0];
- Wstar[i][3]=Wstar[i][0]*Wstar[i][1];
- Wstar[i][4]=Wstar[i][1]*Wstar[i][1];
- if (setP->calcLoglik==1 && setP->iter>1) loglik+=getLogLikelihood(param);
- }
+ /* analytically compute E{W2_i|Y_i} given W1_i, mu and Sigma in x1 homeogeneous areas */
+ for (i=n_samp+s_samp; i<n_samp+s_samp+x1_samp; i++) {
+ /*temp0=params[i].caseP.Wstar[0];
+ temp1=params[i].caseP.mu[1]+setP->Sigma[0][1]/setP->Sigma[0][0]*(temp0-params[i].caseP.mu[0]);
+ Wstar[i][0]=temp0;
+ Wstar[i][1]=temp1;
+ Wstar[i][2]=temp0*temp0;
+ Wstar[i][3]=temp0*temp1;
+ Wstar[i][4]=temp1*temp1;*/
+ }
+ /*analytically compute E{W1_i|Y_i} given W2_i, mu and Sigma in x0 homeogeneous areas */
+ for (i=n_samp+s_samp+x1_samp; i<n_samp+s_samp+x1_samp+x0_samp; i++) {
+ /*temp1=params[i].caseP.Wstar[1];
+ temp0=params[i].caseP.mu[0]+setP->Sigma[0][1]/setP->Sigma[1][1]*(temp1-params[i].caseP.mu[1]);
+ Wstar[i][0]=temp0;
+ Wstar[i][1]=temp1;
+ Wstar[i][2]=temp0*temp0;
+ Wstar[i][3]=temp0*temp1;
+ Wstar[i][4]=temp1*temp1;*/
+ }
/*Calculate sufficient statistics */
@@ -484,27 +494,30 @@ if (verbose>=3 && !setP->sem) Rprintf("E-step start\n");
for(j=0; j<setP->suffstat_len; j++)
suff[j]=suff[j]/t_samp;
-//Rprintf("%5g suff0,2,4 %5g %5g %5g\n",setP->pdTheta[6],suff[0],suff[2],suff[4]);
+ //Rprintf("%5g suff0,2,4 %5g %5g %5g\n",setP->pdTheta[6],suff[0],suff[2],suff[4]);
//if(verbose>=1) Rprintf("Log liklihood %15g\n",loglik);
suff[setP->suffstat_len]=loglik;
-FreeMatrix(Wstar,t_samp);
+ FreeMatrix(Wstar,t_samp);
}
-//Standard M-Step
-//input: Suff
-//output or mutated: pdTheta, params
+/**
+ * CAR M-Step
+ * inputs: Suff (sufficient statistics)
+ * CAR Suff: E[W1], E[W2], E[W1^2], E[W2^2], E[W1W2]
+ * mutated (i.e., output): pdTheta, params
+ */
void ecoMStep(double* Suff, double* pdTheta, Param* params) {
-int i;
-setParam* setP=params[0].setP;
+ int i;
+ setParam* setP=params[0].setP;
pdTheta[0]=Suff[0]; /*mu1*/
pdTheta[1]=Suff[1]; /*mu2*/
-if (setP->hypTest>0) {
- MStepHypTest(params,pdTheta);
-}
+ if (setP->hypTest>0) {
+ MStepHypTest(params,pdTheta);
+ }
if (!setP->fixedRho) { //standard
pdTheta[2]=Suff[2]-2*Suff[0]*pdTheta[0]+pdTheta[0]*pdTheta[0]; //sigma11
@@ -518,6 +531,7 @@ if (setP->hypTest>0) {
Imat[0][0]=Suff[2]-2*pdTheta[0]*Suff[0]+pdTheta[0]*pdTheta[0]; //I_11
Imat[1][1]=Suff[3]-2*Suff[1]*pdTheta[1]+pdTheta[1]*pdTheta[1]; //I_22
Imat[0][1]=Suff[4]-Suff[0]*pdTheta[1]-Suff[1]*pdTheta[0]+pdTheta[0]*pdTheta[1]; //I_12
+
pdTheta[2]=(Imat[0][0]-pdTheta[4]*Imat[0][1]*pow(Imat[0][0]/Imat[1][1],0.5))/(1-pdTheta[4]*pdTheta[4]); //sigma11
pdTheta[3]=(Imat[1][1]-pdTheta[4]*Imat[0][1]*pow(Imat[1][1]/Imat[0][0],0.5))/(1-pdTheta[4]*pdTheta[4]); //sigma22
//sigma12 will be determined below by rho
@@ -540,8 +554,12 @@ if (setP->hypTest>0) {
}
-//M-Step under NCAR
-//NCAR: (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik
+/**
+ * M-Step under NCAR
+ * Input: Suff (sufficient statistics)
+ * (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik
+ * mutated (i.e., output): pdTheta, params
+ */
void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params) {
setParam* setP=params[0].setP;
@@ -625,7 +643,7 @@ void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params) {
}
numer[i][0]=0;
}
-//Rprintf("InvSigma %5g %5g %5g\n",InvSigma[0][0],InvSigma[1][1],InvSigma[0][1]);
+ //Rprintf("InvSigma %5g %5g %5g\n",InvSigma[0][0],InvSigma[1][1],InvSigma[0][1]);
for(ii=0;ii<setP->t_samp;ii++) {
double lx=logit(params[ii].caseP.X,"NCAR beta");
for(j=0;j<2;j++) {
@@ -653,8 +671,8 @@ void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params) {
pdTheta[2]=numer[2][0]; //mu2
pdTheta[7]=numer[3][0]; //beta2
//pdTheta[8] is constant
-//Rprintf("Compare Suff1 %5g to pdT1 %5g \n",Suff[1],pdTheta[1]);
-//Rprintf("Compare Suff2 %5g to pdT2 %5g \n",Suff[2],pdTheta[2]);
+ //Rprintf("Compare Suff1 %5g to pdT1 %5g \n",Suff[1],pdTheta[1]);
+ //Rprintf("Compare Suff2 %5g to pdT2 %5g \n",Suff[2],pdTheta[2]);
if (setP->hypTest>0) {
MStepHypTest(params,pdTheta);
@@ -676,9 +694,11 @@ void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params) {
Smat[0][0]=Suff[4] - 2*pdTheta[6]*(XW1 - pdTheta[0]*Suff[1]) + pdTheta[6]*pdTheta[6]*pdTheta[3]; //S_11
Smat[1][1]=Suff[5] - 2*pdTheta[7]*(XW2 - pdTheta[0]*Suff[2]) + pdTheta[7]*pdTheta[7]*pdTheta[3]; //S_22
Smat[0][1]=Suff[8] - pdTheta[6]*(XW2 - pdTheta[0]*Suff[2]) - pdTheta[7]*(XW1 - pdTheta[0]*Suff[1]) + pdTheta[6]*pdTheta[7]*pdTheta[3] ; //S_12
+
Tmat[0][0]=Smat[0][0] - S1*S1;
Tmat[1][1]=Smat[1][1] - S2*S2;
Tmat[0][1]=Smat[0][1] - S1*S2;
+
pdTheta[4]=(Tmat[0][0]-pdTheta[8]*Tmat[0][1]*pow(Tmat[0][0]/Tmat[1][1],0.5))/(1-pdTheta[8]*pdTheta[8]); //sigma11 | 3
pdTheta[5]=(Tmat[1][1]-pdTheta[8]*Tmat[0][1]*pow(Tmat[1][1]/Tmat[0][0],0.5))/(1-pdTheta[8]*pdTheta[8]); //sigma22 | 3
@@ -705,102 +725,106 @@ void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params) {
if (setP->fixedRho) ncarFixedRhoUnTransform(pdTheta);
}
-//M-Step under CCAR
-void ecoMStepCCAR(double* Suff, double* pdTheta, Param* params) {
- setParam* setP=params[0].setP;
- int k=setP->ccar_nvar;
- int ii,i,j,verbose,t_samp;
- verbose=setP->verbose;
- t_samp=setP->t_samp;
- double **InvSigma=doubleMatrix(2,2);
- double **Z_i=doubleMatrix(k,2);
- double **Z_i_t=doubleMatrix(2,k);
- double **tmpk1=doubleMatrix(k,1);
- double **tmpk2=doubleMatrix(k,2);
- double **tmpkk=doubleMatrix(k,k);
- double **tmp21=doubleMatrix(2,1);
- double **tmp21_b=doubleMatrix(2,1);
- double **tmp12=doubleMatrix(1,2);
- double **tmp22=doubleMatrix(2,2);
- double **denom=doubleMatrix(k,k);
- double **numer=doubleMatrix(k,1);
-//betas
- for (i=0;i<k;i++) {
- for(j=0;j<k;j++) {
- if (j<2) {
- if (i<2) InvSigma[i][j]=setP->InvSigma[i][j];
- }
- denom[i][j]=0;
+/**
+ * M-Step under CCAR
+ * Input: params
+ * mutated (i.e., output): pdTheta, params
+ */
+void ecoMStepCCAR(double* pdTheta, Param* params) {
+ setParam* setP=params[0].setP;
+ int k=setP->ccar_nvar;
+ int ii,i,j,verbose,t_samp;
+ verbose=setP->verbose;
+ t_samp=setP->t_samp;
+ double **InvSigma=doubleMatrix(2,2);
+ double **Z_i=doubleMatrix(k,2);
+ double **Z_i_t=doubleMatrix(2,k);
+ double **tmpk1=doubleMatrix(k,1);
+ double **tmpk2=doubleMatrix(k,2);
+ double **tmpkk=doubleMatrix(k,k);
+ double **tmp21=doubleMatrix(2,1);
+ double **tmp21_b=doubleMatrix(2,1);
+ double **tmp12=doubleMatrix(1,2);
+ double **tmp22=doubleMatrix(2,2);
+ double **denom=doubleMatrix(k,k);
+ double **numer=doubleMatrix(k,1);
+ //betas
+ for (i=0;i<k;i++) {
+ for(j=0;j<k;j++) {
+ if (j<2) {
+ if (i<2) InvSigma[i][j]=setP->InvSigma[i][j];
}
- numer[i][0]=0;
+ denom[i][j]=0;
}
-//Rprintf("InvSigma %5g %5g %5g\n",InvSigma[0][0],InvSigma[1][1],InvSigma[0][1]);
- for(ii=0;ii<setP->t_samp;ii++) {
- for (i=0;i<k;i++) {
- for(j=0;j<k;j++) {
- Z_i[i][j]=params[ii].caseP.Z_i[i][j];
- Z_i_t[i][j]=params[ii].caseP.Z_i[j][i];
- }
+ numer[i][0]=0;
+ }
+ //Rprintf("InvSigma %5g %5g %5g\n",InvSigma[0][0],InvSigma[1][1],InvSigma[0][1]);
+ for(ii=0;ii<setP->t_samp;ii++) {
+ for (i=0;i<k;i++) {
+ for(j=0;j<k;j++) {
+ Z_i[i][j]=params[ii].caseP.Z_i[i][j];
+ Z_i_t[i][j]=params[ii].caseP.Z_i[j][i];
}
- matrixMul(Z_i,InvSigma,k,2,2,2,tmpk2);
- matrixMul(tmpk2,Z_i_t,k,2,2,k,tmpkk);
- for (i=0;i<k;i++)
- for(j=0;j<k;j++)
- denom[i][j]+=tmpkk[i][j];
- for (i=0;i<2;i++) tmp21[i][0]=params[ii].caseP.Wstar[i]; //Wstar
- matrixMul(tmpk2,tmp21,k,2,2,1,tmpk1);
- for (i=0;i<k;i++) numer[i][0]+=tmpk1[i][0];
}
- dinv(denom,k,denom);
- matrixMul(denom,numer,k,k,k,1,numer);
- for(i=0; i<k;i++) pdTheta[i]=numer[i][0]; //betas
+ matrixMul(Z_i,InvSigma,k,2,2,2,tmpk2);
+ matrixMul(tmpk2,Z_i_t,k,2,2,k,tmpkk);
+ for (i=0;i<k;i++)
+ for(j=0;j<k;j++)
+ denom[i][j]+=tmpkk[i][j];
+ for (i=0;i<2;i++) tmp21[i][0]=params[ii].caseP.Wstar[i]; //Wstar
+ matrixMul(tmpk2,tmp21,k,2,2,1,tmpk1);
+ for (i=0;i<k;i++) numer[i][0]+=tmpk1[i][0];
+ }
+ dinv(denom,k,denom);
+ matrixMul(denom,numer,k,k,k,1,numer);
+ for(i=0; i<k;i++) pdTheta[i]=numer[i][0]; //betas
- if (setP->hypTest>0) {
- MStepHypTest(params,pdTheta);
- }
+ if (setP->hypTest>0) {
+ MStepHypTest(params,pdTheta);
+ }
-//conditional Sigma
- //start at 0
- for(i=0; i<2;i++)
- for(j=0; j<2;j++)
- setP->Sigma[i][j] = 0;
+ //conditional Sigma
+ //start at 0
+ for(i=0; i<2;i++)
+ for(j=0; j<2;j++)
+ setP->Sigma[i][j] = 0;
- for(ii=0;ii<setP->t_samp;ii++) {
- for (i=0;i<k;i++) {
- for(j=0;j<k;j++) {
- Z_i_t[i][j]=params[ii].caseP.Z_i[j][i];
- }
+ for(ii=0;ii<setP->t_samp;ii++) {
+ for (i=0;i<k;i++) {
+ for(j=0;j<k;j++) {
+ Z_i_t[i][j]=params[ii].caseP.Z_i[j][i];
}
- matrixMul(Z_i_t,numer,2,k,k,1,tmp21_b);
- for (i=0;i<2;i++) tmp21[i][0]=params[ii].caseP.Wstar[i]; //Wstar
- for (i=0;i<2;i++) tmp21[i][0] = tmp21[i][0] - tmp21_b[i][0]; //Wstar - Z_t*B
- for (i=0;i<2;i++) tmp12[0][i] = tmp21[i][0]; //invserse
- matrixMul(tmp21,tmp12,2,1,1,2,tmp22);
- for(i=0; i<2;i++)
- for(j=0; j<2;j++)
- setP->Sigma[i][j] += tmp22[i][j];
}
- dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"CCAR M-step S2");
-
- //variances
- //CODE BLOCK B
- setP->Sigma3[0][0] = pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3];
- setP->Sigma3[1][1] = pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3];
- setP->Sigma3[2][2] = pdTheta[3];
-
- //covariances
- setP->Sigma3[0][1] = (pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]) + pdTheta[6]*pdTheta[7]*pdTheta[3])/
- (sqrt((pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3])*(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3])));//rho_12 unconditional
- setP->Sigma3[0][1] = setP->Sigma3[0][1]*sqrt(setP->Sigma3[0][0]*setP->Sigma3[1][1]); //sig_12
- setP->Sigma3[0][2] = pdTheta[6]*sqrt((pdTheta[3])/(pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3]))*sqrt(setP->Sigma3[0][0]*setP->Sigma3[2][2]);
- setP->Sigma3[1][2] = pdTheta[7]*sqrt((pdTheta[3])/(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3]))*sqrt(setP->Sigma3[1][1]*setP->Sigma3[2][2]);
-
- //symmetry
- setP->Sigma3[1][0] = setP->Sigma3[0][1];
- setP->Sigma3[2][0] = setP->Sigma3[0][2];
- setP->Sigma3[2][1] = setP->Sigma3[1][2];
+ matrixMul(Z_i_t,numer,2,k,k,1,tmp21_b);
+ for (i=0;i<2;i++) tmp21[i][0]=params[ii].caseP.Wstar[i]; //Wstar
+ for (i=0;i<2;i++) tmp21[i][0] = tmp21[i][0] - tmp21_b[i][0]; //Wstar - Z_t*B
+ for (i=0;i<2;i++) tmp12[0][i] = tmp21[i][0]; //invserse
+ matrixMul(tmp21,tmp12,2,1,1,2,tmp22);
+ for(i=0; i<2;i++)
+ for(j=0; j<2;j++)
+ setP->Sigma[i][j] += tmp22[i][j];
+ }
+ dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"CCAR M-step S2");
+
+ //variances
+ //CODE BLOCK B
+ setP->Sigma3[0][0] = pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3];
+ setP->Sigma3[1][1] = pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3];
+ setP->Sigma3[2][2] = pdTheta[3];
+
+ //covariances
+ setP->Sigma3[0][1] = (pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]) + pdTheta[6]*pdTheta[7]*pdTheta[3])/
+ (sqrt((pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3])*(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3])));//rho_12 unconditional
+ setP->Sigma3[0][1] = setP->Sigma3[0][1]*sqrt(setP->Sigma3[0][0]*setP->Sigma3[1][1]); //sig_12
+ setP->Sigma3[0][2] = pdTheta[6]*sqrt((pdTheta[3])/(pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3]))*sqrt(setP->Sigma3[0][0]*setP->Sigma3[2][2]);
+ setP->Sigma3[1][2] = pdTheta[7]*sqrt((pdTheta[3])/(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3]))*sqrt(setP->Sigma3[1][1]*setP->Sigma3[2][2]);
+
+ //symmetry
+ setP->Sigma3[1][0] = setP->Sigma3[0][1];
+ setP->Sigma3[2][0] = setP->Sigma3[0][2];
+ setP->Sigma3[2][1] = setP->Sigma3[1][2];
dinv2D((double*)(&(setP->Sigma3[0][0])), 3, (double*)(&(setP->InvSigma3[0][0])),"NCAR M-step S3");
initNCAR(params,pdTheta);
@@ -809,6 +833,7 @@ void ecoMStepCCAR(double* Suff, double* pdTheta, Param* params) {
/**
* Exta M-Step for hypothesis testing
+ * Input: params
* Mutates pdTheta
*/
void MStepHypTest(Param* params, double* pdTheta) {
@@ -854,19 +879,23 @@ void MStepHypTest(Param* params, double* pdTheta) {
//offset theta
for(k=0;k<2;k++) {
- offset=temp_DbyL[k][0]/denom;
- int kindex= (setP->ncar) ? (k+1) : k;
- pdTheta[kindex]=pdTheta[kindex]-offset;
+ offset=temp_DbyL[k][0]/denom;
+ int kindex= (setP->ncar) ? (k+1) : k;
+ pdTheta[kindex]=pdTheta[kindex]-offset;
}
}
-//NCAR initialize
-//note that for fixed rho, the input is the UNTRANSFORMED PARAMETERS
+/**
+ * NCAR initialize
+ * note that for fixed rho, the input is the UNTRANSFORMED PARAMETERS
+ * input: pdTheta
+ * mutates: params
+ */
void initNCAR(Param* params, double* pdTheta) {
setParam* setP=params[0].setP;
- int i;
+ int i;
if (!setP->fixedRho) { //variable rho
//reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12
@@ -876,6 +905,7 @@ void initNCAR(Param* params, double* pdTheta) {
setP->Sigma[0][1]= setP->Sigma[0][1]*sqrt(setP->Sigma[0][0]*setP->Sigma[1][1]); //covar
setP->Sigma[1][0]= setP->Sigma[0][1]; //symmetry
dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"NCAR M-step S2");
+
//assign each data point the new mu (different for each point)
for(i=0;i<setP->t_samp;i++) {
params[i].caseP.mu[0]=pdTheta[1] + pdTheta[6]*sqrt(pdTheta[4]/pdTheta[3])*(logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]);
@@ -905,10 +935,15 @@ void initNCAR(Param* params, double* pdTheta) {
}
}
-//CCAR initialize
+/**
+ * CCAR initialize
+ * Note that fixed rho is currently unimplemented
+ * input: pdTheta
+ * mutates: params
+ */
void initCCAR(Param* params, double* pdTheta) {
setParam* setP=params[0].setP;
- int i;
+ int i;
if (!setP->fixedRho) { //variable rho
//reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12
@@ -931,173 +966,180 @@ void initCCAR(Param* params, double* pdTheta) {
}
}
-/*
+/**
* input: optTheta,pdTheta,params,Rmat
- * output: param_lenxparam_len matrices Rmat and Rmat_old
+ * mutate/output: matrices Rmat and Rmat_old (dimensions of param_len x param_len)
* optTheta is optimal theta
* pdTheta is current theta
* Rmat_old contains the input Rmat
*/
void ecoSEM(double* optTheta, double* pdTheta, Param* params, double Rmat_old[7][7], double Rmat[7][7]) {
- //assume we have optTheta, ie \hat{phi}
- //pdTheta is phi^{t+1}
- int i,j,verbose,len,param_len;
- setParam setP_sem=*(params[0].setP);
- param_len=setP_sem.param_len;
- double *SuffSem=doubleArray(setP_sem.suffstat_len+1); //sufficient stats
- double phiTI[param_len]; //phi^t_i
- double phiTp1I[param_len]; //phi^{t+1}_i
- double t_optTheta[param_len]; //transformed optimal
- double t_phiTI[param_len]; //transformed phi^t_i
- double t_phiTp1I[param_len]; //transformed phi^{t+1}_i
- Param* params_sem=(Param*) Calloc(params->setP->t_samp,Param);
- verbose=setP_sem.verbose;
- //determine length of R matrix
- len=0;
- for(j=0; j<param_len;j++)
- if(setP_sem.varParam[j]) len++;
-
- //first, save old Rmat
- for(i=0;i<len;i++)
- for(j=0;j<len;j++)
- Rmat_old[i][j]=Rmat[i][j];
-
- for(i=0;i<len;i++) {
- if (!setP_sem.semDone[i]) { //we're not done with this row
- //step 1: set phi^t_i
- if (verbose>=2) Rprintf("Theta(%d):",(i+1));
- int switch_index_ir=0; int switch_index_it;
- for(j=0;j<param_len;j++) {
- if (!setP_sem.varParam[j]) //const
- phiTI[j]=optTheta[j];
- else {
- if (i==switch_index_ir) {
- phiTI[j]=pdTheta[j]; //current value
- switch_index_it=j;
- }
- else phiTI[j]=optTheta[j]; //optimal value
- switch_index_ir++;
+ //assume we have optTheta, ie \hat{phi}
+ //pdTheta is phi^{t+1}
+ int i,j,verbose,len,param_len;
+ setParam setP_sem=*(params[0].setP);
+ param_len=setP_sem.param_len;
+ double *SuffSem=doubleArray(setP_sem.suffstat_len+1); //sufficient stats
+ double phiTI[param_len]; //phi^t_i
+ double phiTp1I[param_len]; //phi^{t+1}_i
+ double t_optTheta[param_len]; //transformed optimal
+ double t_phiTI[param_len]; //transformed phi^t_i
+ double t_phiTp1I[param_len]; //transformed phi^{t+1}_i
+ Param* params_sem=(Param*) Calloc(params->setP->t_samp,Param);
+ verbose=setP_sem.verbose;
+ //determine length of R matrix
+ len=0;
+ for(j=0; j<param_len;j++)
+ if(setP_sem.varParam[j]) len++;
+
+ //first, save old Rmat
+ for(i=0;i<len;i++)
+ for(j=0;j<len;j++)
+ Rmat_old[i][j]=Rmat[i][j];
+
+ for(i=0;i<len;i++) {
+ if (!setP_sem.semDone[i]) { //we're not done with this row
+ //step 1: set phi^t_i
+ if (verbose>=2) Rprintf("Theta(%d):",(i+1));
+ int switch_index_ir=0; int switch_index_it;
+ for(j=0;j<param_len;j++) {
+ if (!setP_sem.varParam[j]) //const
+ phiTI[j]=optTheta[j];
+ else {
+ if (i==switch_index_ir) {
+ phiTI[j]=pdTheta[j]; //current value
+ switch_index_it=j;
}
- if (verbose>=2) Rprintf(" %5g ", phiTI[j]);
+ else phiTI[j]=optTheta[j]; //optimal value
+ switch_index_ir++;
}
- //if (setP_sem.fixedRho) {
- // phiTI[len-1]=pdTheta[len-1];
- // phiTp1I[len-1]=pdTheta[len-1];
- // if (verbose>=2) Rprintf(" %5g ", phiTI[len-1]);
- //}
- if (verbose>=2) Rprintf("\n");
- for(j=0;j<param_len;j++) phiTp1I[j]=phiTI[j]; //init next iteration
-
- //step 2: run an E-step and an M-step with phi^t_i
- //initialize params
- if (!setP_sem.ncar) {
- for(j=0;j<setP_sem.t_samp;j++) {
- params_sem[j].setP=&setP_sem;
- params_sem[j].caseP=params[j].caseP;
- params_sem[j].caseP.mu[0] = phiTI[0];
- params_sem[j].caseP.mu[1] = phiTI[1];
- }
- setP_sem.Sigma[0][0] = phiTI[2];
- setP_sem.Sigma[1][1] = phiTI[3];
- setP_sem.Sigma[0][1] = phiTI[4]*sqrt(phiTI[2]*phiTI[3]);
- setP_sem.Sigma[1][0] = setP_sem.Sigma[0][1];
- dinv2D((double*)(&(setP_sem.Sigma[0][0])), 2, (double*)(&(setP_sem.InvSigma[0][0])), "SEM: CAR init ");
+ if (verbose>=2) Rprintf(" %5g ", phiTI[j]);
+ }
+ //if (setP_sem.fixedRho) {
+ // phiTI[len-1]=pdTheta[len-1];
+ // phiTp1I[len-1]=pdTheta[len-1];
+ // if (verbose>=2) Rprintf(" %5g ", phiTI[len-1]);
+ //}
+ if (verbose>=2) Rprintf("\n");
+ for(j=0;j<param_len;j++) phiTp1I[j]=phiTI[j]; //init next iteration
+
+ //step 2: run an E-step and an M-step with phi^t_i
+ //initialize params
+ if (!setP_sem.ncar) {
+ for(j=0;j<setP_sem.t_samp;j++) {
+ params_sem[j].setP=&setP_sem;
+ params_sem[j].caseP=params[j].caseP;
+ params_sem[j].caseP.mu[0] = phiTI[0];
+ params_sem[j].caseP.mu[1] = phiTI[1];
}
- else {
- for(j=0;j<setP_sem.t_samp;j++) {
- params_sem[j].setP=&setP_sem;
- params_sem[j].caseP=params[j].caseP;
- }
- setP_sem.Sigma3[0][0] = phiTI[4];
- setP_sem.Sigma3[1][1] = phiTI[5];
- setP_sem.Sigma3[2][2] = phiTI[3];
-
- //covariances
- setP_sem.Sigma3[0][1] = phiTI[8]*sqrt(phiTI[4]*phiTI[5]);
- setP_sem.Sigma3[0][2] = phiTI[6]*sqrt(phiTI[4]*phiTI[3]);
- setP_sem.Sigma3[1][2] = phiTI[7]*sqrt(phiTI[5]*phiTI[3]);
-
- //symmetry
- setP_sem.Sigma3[1][0] = setP_sem.Sigma3[0][1];
- setP_sem.Sigma3[2][0] = setP_sem.Sigma3[0][2];
- setP_sem.Sigma3[2][1] = setP_sem.Sigma3[1][2];
- if (verbose>=2) {
- Rprintf("Sigma3: %5g %5g %5g %5g %5g %5g; %5g %5g\n",setP_sem.Sigma3[0][0],setP_sem.Sigma3[0][1],setP_sem.Sigma3[1][1],setP_sem.Sigma3[0][2],setP_sem.Sigma3[1][2],setP_sem.Sigma3[2][2],*(&(setP_sem.Sigma3[0][0])+0),*(&(setP_sem.Sigma3[0][0])+8));
- }
- dinv2D((double*)(&(setP_sem.Sigma3[0][0])), 3, (double*)(&(setP_sem.InvSigma3[0][0])),"SEM: NCAR Sig3 init");
- if (verbose>=2) {
- Rprintf("Check 1");
- }
- if (setP_sem.fixedRho) ncarFixedRhoTransform(phiTI);
- initNCAR(params_sem,phiTI);
- if (setP_sem.fixedRho) ncarFixedRhoUnTransform(phiTI);
- if (verbose>=2) {
- Rprintf("Check 2");
- }
+ setP_sem.Sigma[0][0] = phiTI[2];
+ setP_sem.Sigma[1][1] = phiTI[3];
+ setP_sem.Sigma[0][1] = phiTI[4]*sqrt(phiTI[2]*phiTI[3]);
+ setP_sem.Sigma[1][0] = setP_sem.Sigma[0][1];
+ dinv2D((double*)(&(setP_sem.Sigma[0][0])), 2, (double*)(&(setP_sem.InvSigma[0][0])), "SEM: CAR init ");
+ }
+ else {
+ for(j=0;j<setP_sem.t_samp;j++) {
+ params_sem[j].setP=&setP_sem;
+ params_sem[j].caseP=params[j].caseP;
+ }
+ setP_sem.Sigma3[0][0] = phiTI[4];
+ setP_sem.Sigma3[1][1] = phiTI[5];
+ setP_sem.Sigma3[2][2] = phiTI[3];
+
+ //covariances
+ setP_sem.Sigma3[0][1] = phiTI[8]*sqrt(phiTI[4]*phiTI[5]);
+ setP_sem.Sigma3[0][2] = phiTI[6]*sqrt(phiTI[4]*phiTI[3]);
+ setP_sem.Sigma3[1][2] = phiTI[7]*sqrt(phiTI[5]*phiTI[3]);
+
+ //symmetry
+ setP_sem.Sigma3[1][0] = setP_sem.Sigma3[0][1];
+ setP_sem.Sigma3[2][0] = setP_sem.Sigma3[0][2];
+ setP_sem.Sigma3[2][1] = setP_sem.Sigma3[1][2];
+ if (verbose>=2) {
+ Rprintf("Sigma3: %5g %5g %5g %5g %5g %5g; %5g %5g\n",setP_sem.Sigma3[0][0],setP_sem.Sigma3[0][1],setP_sem.Sigma3[1][1],setP_sem.Sigma3[0][2],setP_sem.Sigma3[1][2],setP_sem.Sigma3[2][2],*(&(setP_sem.Sigma3[0][0])+0),*(&(setP_sem.Sigma3[0][0])+8));
+ }
+ dinv2D((double*)(&(setP_sem.Sigma3[0][0])), 3, (double*)(&(setP_sem.InvSigma3[0][0])),"SEM: NCAR Sig3 init");
+ if (verbose>=2) {
+ Rprintf("Check 1");
}
+ if (setP_sem.fixedRho) ncarFixedRhoTransform(phiTI);
+ initNCAR(params_sem,phiTI);
+ if (setP_sem.fixedRho) ncarFixedRhoUnTransform(phiTI);
+ if (verbose>=2) {
+ Rprintf("Check 2");
+ }
+ }
- //if (verbose>=2) {
- // Rprintf("Sigma: %5g %5g %5g %5g\n",setP_sem.Sigma[0][0],setP_sem.Sigma[0][1],setP_sem.Sigma[1][0],setP_sem.Sigma[1][1]);
- //}
-
- ecoEStep(params_sem, SuffSem);
- if (!params[0].setP->ncar)
- ecoMStep(SuffSem,phiTp1I,params_sem);
- else
- ecoMStepNCAR(SuffSem,phiTp1I,params_sem);
-
- //step 3: create new R matrix row
- transformTheta(phiTp1I,t_phiTp1I,setP_sem.param_len,&setP_sem);
- transformTheta(optTheta,t_optTheta,setP_sem.param_len,&setP_sem);
- transformTheta(phiTI,t_phiTI,setP_sem.param_len,&setP_sem);
- /*if (verbose>=2) {
- Rprintf("T+1:");
- for (j=0;j<param_len;j++) Rprintf(" %5g ", phiTp1I[j]);
- Rprintf("\nOpt:");
- for (j=0;j<param_len;j++) Rprintf(" %5g ", optTheta[j]);
- Rprintf("\n 2nd item: %5g %5g %5g %5g", t_phiTp1I[2], t_optTheta[2], t_phiTI[switch_index_it], t_optTheta[switch_index_it]);
- }*/
- int index_jr=0;
- for(j = 0; j<param_len; j++) {
- if (setP_sem.varParam[j]) {
- Rmat[i][index_jr]=(t_phiTp1I[j]-t_optTheta[j])/(t_phiTI[switch_index_it]-t_optTheta[switch_index_it]);
- index_jr++;
- }
+ //if (verbose>=2) {
+ // Rprintf("Sigma: %5g %5g %5g %5g\n",setP_sem.Sigma[0][0],setP_sem.Sigma[0][1],setP_sem.Sigma[1][0],setP_sem.Sigma[1][1]);
+ //}
+
+ ecoEStep(params_sem, SuffSem);
+ if (!params[0].setP->ncar)
+ ecoMStep(SuffSem,phiTp1I,params_sem);
+ else
+ ecoMStepNCAR(SuffSem,phiTp1I,params_sem);
+
+ //step 3: create new R matrix row
+ transformTheta(phiTp1I,t_phiTp1I,setP_sem.param_len,&setP_sem);
+ transformTheta(optTheta,t_optTheta,setP_sem.param_len,&setP_sem);
+ transformTheta(phiTI,t_phiTI,setP_sem.param_len,&setP_sem);
+ /*if (verbose>=2) {
+ Rprintf("T+1:");
+ for (j=0;j<param_len;j++) Rprintf(" %5g ", phiTp1I[j]);
+ Rprintf("\nOpt:");
+ for (j=0;j<param_len;j++) Rprintf(" %5g ", optTheta[j]);
+ Rprintf("\n 2nd item: %5g %5g %5g %5g", t_phiTp1I[2], t_optTheta[2], t_phiTI[switch_index_it], t_optTheta[switch_index_it]);
+ }*/
+ int index_jr=0;
+ for(j = 0; j<param_len; j++) {
+ if (setP_sem.varParam[j]) {
+ Rmat[i][index_jr]=(t_phiTp1I[j]-t_optTheta[j])/(t_phiTI[switch_index_it]-t_optTheta[switch_index_it]);
+ index_jr++;
}
+ }
- //step 4: check for difference
- params[0].setP->semDone[i]=closeEnough((double*)Rmat[i],(double*)Rmat_old[i],len,sqrt(params[0].setP->convergence));
+ //step 4: check for difference
+ params[0].setP->semDone[i]=closeEnough((double*)Rmat[i],(double*)Rmat_old[i],len,sqrt(params[0].setP->convergence));
- }
- else { //keep row the same
- for(j = 0; j<len; j++)
- Rmat[i][j]=Rmat_old[i][j];
- }
}
- if(verbose>=1) {
- for(i=0;i<len;i++) {
- Rprintf("\nR Matrix row %d (%s): ", (i+1), (params[0].setP->semDone[i]) ? " Done" : "Not done");
- for(j=0;j<len;j++) {
- Rprintf(" %5.2f ",Rmat[i][j]);
- }
+ else { //keep row the same
+ for(j = 0; j<len; j++)
+ Rmat[i][j]=Rmat_old[i][j];
+ }
+ }
+ if(verbose>=1) {
+ for(i=0;i<len;i++) {
+ Rprintf("\nR Matrix row %d (%s): ", (i+1), (params[0].setP->semDone[i]) ? " Done" : "Not done");
+ for(j=0;j<len;j++) {
+ Rprintf(" %5.2f ",Rmat[i][j]);
}
- Rprintf("\n\n");
}
- Free(SuffSem);
- Free(params_sem);
- }
+ Rprintf("\n\n");
+ }
+ Free(SuffSem);
+ Free(params_sem);
+}
-/*
+/**
* Read in the data set and population params
+ * inputs:
+ * ndim: number of dimensions
+ * pdX: non-survey, non-homogenous data (length n_samp)
+ * sur_W: survey data (length s_samp)
+ * x1_W1: homogenous data (X==1) (length x1_samp)
+ * x0_W2: homogenous data (X==0) (length x0_samp)
+ * mutates: params
*/
void readData(Param* params, int n_dim, double* pdX, double* sur_W, double* x1_W1, double* x0_W2,
int n_samp, int s_samp, int x1_samp, int x0_samp) {
/* read the data set */
-int itemp,i,j,surv_dim;
-double dtemp;
-setParam* setP=params[0].setP;
+ int itemp,i,j,surv_dim;
+ double dtemp;
+ setParam* setP=params[0].setP;
/** Packing Y, X **/
itemp = 0;
@@ -1107,7 +1149,7 @@ setParam* setP=params[0].setP;
}
for (i = 0; i < n_samp; i++) {
- params[i].caseP.dataType=0;
+ params[i].caseP.dataType=DPT_General;
params[i].caseP.X=params[i].caseP.data[0];
params[i].caseP.Y=params[i].caseP.data[1];
//fix X edge cases
@@ -1116,51 +1158,84 @@ setParam* setP=params[0].setP;
params[i].caseP.Y=(params[i].caseP.Y >= 1) ? .9999 : ((params[i].caseP.Y <= 0) ? 0.0001 : params[i].caseP.Y);
}
- /*read homeogenous areas information */
- for (i=n_samp; i<n_samp+x1_samp; i++) {
- params[i].caseP.dataType=1;
- params[i].caseP.W[0]=(x1_W1[i] == 1) ? .9999 : ((x1_W1[i]==0) ? .0001 : x1_W1[i]);
- params[i].caseP.Wstar[0]=logit(params[i].caseP.W[0],"X1 read");
+ /*read the survey data */
+ itemp=0;
+ surv_dim=n_dim + (setP->ncar ? 1 : 0); //if NCAR, the survey data will include X's
+ for (j=0; j<surv_dim; j++) {
+ for (i=n_samp; i<n_samp+s_samp; i++) {
+ dtemp=sur_W[itemp++];
+ params[i].caseP.dataType=DPT_Survey;
+ if (j<n_dim) {
+ params[i].caseP.W[j]=(dtemp == 1) ? .9999 : ((dtemp==0) ? .0001 : dtemp);
+ params[i].caseP.Wstar[j]=logit(params[i].caseP.W[j],"Survey read");
+ }
+ else { //if given the X (NCAR), we set the X and contruct Y
+ params[i].caseP.X=(dtemp == 1) ? .9999 : ((dtemp==0) ? .0001 : dtemp);
+ params[i].caseP.Y=params[i].caseP.X*params[i].caseP.W[0]+(1-params[i].caseP.X);
+ }
}
+ }
- for (i=n_samp+x1_samp; i<n_samp+x1_samp+x0_samp; i++) {
- params[i].caseP.dataType=2;
- params[i].caseP.W[1]=(x0_W2[i] == 1) ? .9999 : ((x0_W2[i]==0) ? .0001 : x0_W2[i]);
- params[i].caseP.Wstar[1]=logit(params[i].caseP.W[1],"X0 read");
- }
+ /*read homeogenous areas information */
+ for (i=n_samp+s_samp; i<n_samp+s_samp+x1_samp; i++) {
+ /*params[i].caseP.dataType=DPT_Homog_X1;
+ params[i].caseP.W[0]=(x1_W1[i] == 1) ? .9999 : ((x1_W1[i]==0) ? .0001 : x1_W1[i]);
+ params[i].caseP.Wstar[0]=logit(params[i].caseP.W[0],"X1 read");*/
+ }
+ for (i=n_samp+s_samp+x1_samp; i<n_samp+s_samp+x1_samp+x0_samp; i++) {
+ /*params[i].caseP.dataType=DPT_Homog_X0;
+ params[i].caseP.W[1]=(x0_W2[i] == 1) ? .9999 : ((x0_W2[i]==0) ? .0001 : x0_W2[i]);
+ params[i].caseP.Wstar[1]=logit(params[i].caseP.W[1],"X0 read");*/
+ }
- /*read the survey data */
- itemp=0;
- surv_dim=n_dim + (setP->ncar ? 1 : 0); //if NCAR, the survey data will include X's
- for (j=0; j<surv_dim; j++) {
- for (i=n_samp+x1_samp+x0_samp; i<n_samp+x1_samp+x0_samp+s_samp; i++) {
- dtemp=sur_W[itemp++];
- params[i].caseP.dataType=3;
- if (j<n_dim) {
- params[i].caseP.W[j]=(dtemp == 1) ? .9999 : ((dtemp==0) ? .0001 : dtemp);
- params[i].caseP.Wstar[j]=logit(params[i].caseP.W[j],"Survey read");
- }
- else { //if given the X (NCAR), we set the X and contruct Y
- params[i].caseP.X=(dtemp == 1) ? .9999 : ((dtemp==0) ? .0001 : dtemp);
- params[i].caseP.Y=params[i].caseP.X*params[i].caseP.W[0]+(1-params[i].caseP.X);
- }
- }
- }
+ /*Current version of program does not handle homogenous data*/
+ if ((x1_samp+x0_samp)>0) {
+ printf("WARNING: Homogenous data is ignored and not handled by the current version of eco.");
+ }
- if (setP->verbose>=2) {
- Rprintf("Y X\n");
- for(i=0;i<5;i++) Rprintf("%5d%14g%14g\n",i,params[i].caseP.Y,params[i].caseP.X);
- if (s_samp>0) {
- Rprintf("SURVEY data\nY X\n");
- int s_max=fmin2(n_samp+x1_samp+x0_samp+s_samp,n_samp+x1_samp+x0_samp+5);
- for(i=n_samp+x1_samp+x0_samp; i<s_max; i++) Rprintf("%5d%14g%14g\n",i,params[i].caseP.Y,params[i].caseP.X);
- }
+
+ if (setP->verbose>=2) {
+ Rprintf("Y X\n");
+ for(i=0;i<5;i++) Rprintf("%5d%14g%14g\n",i,params[i].caseP.Y,params[i].caseP.X);
+ if (s_samp>0) {
+ Rprintf("SURVEY data\nY X\n");
+ int s_max=fmin2(n_samp+x1_samp+x0_samp+s_samp,n_samp+x1_samp+x0_samp+5);
+ for(i=n_samp+x1_samp+x0_samp; i<s_max; i++) Rprintf("%5d%14g%14g\n",i,params[i].caseP.Y,params[i].caseP.X);
}
+ }
- }
+}
-/*
+/**
+ * During the main E-M loop, this function prints the output column headers
+ * finalTheta: 1 if this is for the final theta -- include static variables
+ **/
+void printColumnHeader(int main_loop, int iteration_max, setParam* setP, int finalTheta) {
+ int i;
+ int param_len;
+ param_len = setP->param_len;
+
+ char temp[50]; int hlen;
+ if (!finalTheta) hlen=sprintf(temp, "cycle %d/%d:",main_loop,iteration_max); //Length of cycle text
+ else hlen=sprintf(temp, "Final Theta:");
+ for (i=0;i<hlen;i++) Rprintf(" ");
+
+ if (param_len<=5) { //CAR
+ Rprintf(" mu_1 mu_2 sig_1 sig_2");
+ if (!setP->fixedRho || finalTheta) Rprintf(" r_12");
+ } else { //NCAR
+ if (finalTheta) {
+ Rprintf(" mu_3 mu_1 mu_2 sig_3 sig_1 sig_2 r_13 r_23 r_12");
+ }
+ else {
+ Rprintf(" mu_1 mu_2 sig_1 sig_2 r_13 r_23 r_12");
+ }
+ }
+ Rprintf("\n");
+}
+
+/**
* Parameterizes the elements of theta
* Input: pdTheta
* Mutates: t_pdTheta
@@ -1186,6 +1261,11 @@ void transformTheta(double* pdTheta, double* t_pdTheta, int len, setParam* setP)
}
}
+/**
+ * Un-parameterizes the elements of theta
+ * Input: t_pdTheta
+ * Mutates: pdTheta
+ */
void untransformTheta(double* t_pdTheta,double* pdTheta, int len, setParam* setP) {
if (len<=5) {
pdTheta[0]=t_pdTheta[0];
@@ -1214,7 +1294,7 @@ void untransformTheta(double* t_pdTheta,double* pdTheta, int len, setParam* setP
}
/**
- * untransforms theta under ncar
+ * untransforms theta under ncar -- fixed rho
* input reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3
* output reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12
* mutates: pdTheta
@@ -1236,7 +1316,7 @@ void ncarFixedRhoUnTransform(double* pdTheta) {
}
/**
- * transforms theta under ncar
+ * transforms theta under ncar -- fixed rho
* input reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12
* output reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3
* mutates: pdTheta
@@ -1263,26 +1343,14 @@ void ncarFixedRhoTransform(double* pdTheta) {
* Mutates: history_full
**/
void setHistory(double* t_pdTheta, double loglik, int iter,setParam* setP,double history_full[][10]) {
- //calc len
- /*if you don't want to record the contant m3 and s3 in ncar use the code commented out*/
- /*int i,j;
- int len=0;
- for(j=0; j<setP->param_len;j++)
- if(setP->varParam[j]) len++;
- i=0;
- for(j=0;j<setP->param_len;j++)
- if(setP->varParam[j]) {
- history_full[iter][i]=t_pdTheta[j];
- i++;
- }*/
int len=setP->param_len;
int j;
for(j=0;j<len;j++)
- history_full[iter][j]=t_pdTheta[j];
+ history_full[iter][j]=t_pdTheta[j];
if (iter>0) history_full[iter-1][len]=loglik;
}
-/*
+/**
* Determines whether we have converged
* Takes in the current and old (one step previous) array of theta values
* maxerr is the maximum difference two corresponding values can have before the
@@ -1295,6 +1363,9 @@ int closeEnough(double* pdTheta, double* pdTheta_old, int len, double maxerr) {
return 1;
}
+/**
+ * Is the SEM process completely done.
+ **/
int semDoneCheck(setParam* setP) {
int varlen=0; int j;
for(j=0; j<setP->param_len;j++)
@@ -1304,6 +1375,9 @@ int semDoneCheck(setParam* setP) {
return 1;
}
+/**
+ * Older function. No longer used.
+ **/
void gridEStep(Param* params, int n_samp, int s_samp, int x1_samp, int x0_samp, double* suff, int verbose, double minW1, double maxW1) {
int n_dim=2;
diff --git a/src/macros.h b/src/macros.h
index 109a193..4a337d1 100644
--- a/src/macros.h
+++ b/src/macros.h
@@ -5,7 +5,17 @@
/****************/
/** structrues **/
/****************/
-/* parameters and observed data */
+
+/* ENUMS
+ * sufficient statistic to calculate: 0->W1*, 1->W2*, 2->(W1*)^2, 3->(W1*)(W2*), 4->(W2*)^2, 5->W1,6->W2 7->Log Lik,8->test
+ * data point type: 0=general, 1= homogenous with (X==1), 2= homogenous with (X==0), 3=survey (W1 and W2 are known)
+ */
+ enum e_sufficient_stats {SS_W1star, SS_W2star, SS_W1star2, SS_W1W2star, SS_W2star2, SS_W1, SS_W2, SS_Loglik, SS_Test};
+ typedef enum e_sufficient_stats sufficient_stat;
+ enum e_datapoint_types {DPT_General,DPT_Homog_X1, DPT_Homog_X0, DPT_Survey};
+ typedef enum e_datapoint_types datapoint_type;
+
+/* parameters and observed data -- no longer used*/
struct Param_old{
double mu[2];
double Sigma[2][2];
@@ -23,13 +33,14 @@ struct Param_old{
double W1_ub;
double W2_lb;
double W2_ub;
- int W1_inf; //inf: 0->(lb,ub), -1->(-inf,ub), 1->(lb,inf), 2->(-inf,inf)
- int W2_inf;
- int suff; //the sufficient stat we're calculating: 0->W1, 1->W2,2->W1^2,3->W1W2,4->W2^2,7->Log Lik, 5/6,-1 ->test case
+ sufficient_stat suff; //the sufficient stat we're calculating: 0->W1, 1->W2,2->W1^2,3->W1W2,4->W2^2,7->Log Lik, 5/6,-1 ->test case
};
typedef struct Param_old Param_old;
+/**
+ * The structure that holds per-record infromation
+ */
struct caseParam {
double mu[2];
double data[2]; //collect the data
@@ -40,12 +51,15 @@ struct caseParam {
double Wstar[2]; //place to store E[W1*] when we calculate it each step
double Wbounds[2][2]; //[i][j] is {j:lower,upper}-bound of W{i+1}
int suff; //the sufficient stat we're calculating: 0->W1, 1->W2,2->W1^2,3->W1W2,4->W2^2,7->Log Lik, 5/6,-1 ->test case
- int dataType; //0=unknown, 1=(X==1),2=(X==0),3=survey
+ datapoint_type dataType;
double** Z_i; //CCAR: k x 2
};
typedef struct caseParam caseParam;
+/**
+ * The structure that holds dataset infromation
+ */
struct setParam {
int n_samp, t_samp, s_samp,x1_samp,x0_samp,param_len,suffstat_len; //types of data sizes
int iter, ncar, ccar, ccar_nvar, fixedRho, sem, hypTest, verbose, calcLoglik; //options
diff --git a/src/rand.c b/src/rand.c
index 3e34d66..39c42ed 100644
--- a/src/rand.c
+++ b/src/rand.c
@@ -203,52 +203,52 @@ double dBVNtomo(double *Wstar, /* Wstar values */
double density;
double rho, dtemp;
- Param *param=(Param *)pp;
- MEAN[0]=param->caseP.mu[0];
- MEAN[1]=param->caseP.mu[1];
- SIGMA[0][0]=param->setP->Sigma[0][0];
- SIGMA[1][1]=param->setP->Sigma[1][1];
- SIGMA[0][1]=param->setP->Sigma[0][1];
- SIGMA[1][0]=param->setP->Sigma[1][0];
+ Param *param=(Param *)pp;
+ MEAN[0]=param->caseP.mu[0];
+ MEAN[1]=param->caseP.mu[1];
+ SIGMA[0][0]=param->setP->Sigma[0][0];
+ SIGMA[1][1]=param->setP->Sigma[1][1];
+ SIGMA[0][1]=param->setP->Sigma[0][1];
+ SIGMA[1][0]=param->setP->Sigma[1][0];
- rho=SIGMA[0][1]/sqrt(SIGMA[0][0]*SIGMA[1][1]);
- dtemp=1/(2*M_PI*sqrt(SIGMA[0][0]*SIGMA[1][1]*(1-rho*rho)));
+ rho=SIGMA[0][1]/sqrt(SIGMA[0][0]*SIGMA[1][1]);
+ dtemp=1/(2*M_PI*sqrt(SIGMA[0][0]*SIGMA[1][1]*(1-rho*rho)));
- density=-1/(2*(1-rho*rho))*
- ((Wstar[0]-MEAN[0])*(Wstar[0]-MEAN[0])/SIGMA[0][0]+
- +(Wstar[1]-MEAN[1])*(Wstar[1]-MEAN[1])/SIGMA[1][1]
- -2*rho*(Wstar[0]-MEAN[0])*(Wstar[1]-MEAN[1])/sqrt(SIGMA[0][0]*SIGMA[1][1]))
- +log(dtemp)-log(normc);
+ density=-1/(2*(1-rho*rho))*
+ ((Wstar[0]-MEAN[0])*(Wstar[0]-MEAN[0])/SIGMA[0][0]+
+ +(Wstar[1]-MEAN[1])*(Wstar[1]-MEAN[1])/SIGMA[1][1]
+ -2*rho*(Wstar[0]-MEAN[0])*(Wstar[1]-MEAN[1])/sqrt(SIGMA[0][0]*SIGMA[1][1]))
+ +log(dtemp)-log(normc);
- if (give_log==0) density=exp(density);
- /*Rprintf("s11 %5g s22 %5g normc %5g dtemp %5g ldensity %5g\n", SIGMA[0][0],SIGMA[1][1],normc, dtemp, density);
- char ch;
- scanf(" %c", &ch );*/
+ if (give_log==0) density=exp(density);
+ /*Rprintf("s11 %5g s22 %5g normc %5g dtemp %5g ldensity %5g\n", SIGMA[0][0],SIGMA[1][1],normc, dtemp, density);
+ char ch;
+ scanf(" %c", &ch );*/
- Free(MEAN);
- FreeMatrix(SIGMA,dim);
+ Free(MEAN);
+ FreeMatrix(SIGMA,dim);
- return density;
+ return density;
}
- double invLogit(double x) {
- if (x>30) return 0;
- else return (1/(1+exp(-1*x)));
- }
+double invLogit(double x) {
+ if (x>30) return 0;
+ else return (1/(1+exp(-1*x)));
+}
- double logit(double x,char* emsg) {
- if (x>=1 || x<=0) {
- Rprintf(emsg);
- Rprintf(": %5g is out of logit range\n",x);
- }
- return log(x/(1-x));
- }
+double logit(double x,char* emsg) {
+ if (x>=1 || x<=0) {
+ Rprintf(emsg);
+ Rprintf(": %5g is out of logit range\n",x);
+ }
+ return log(x/(1-x));
+}
- int bit(int t, int n) {
- t=t>>n;
- return (t % 2);
- }
+int bit(int t, int n) {
+ t=t>>n;
+ return (t % 2);
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-eco.git
More information about the debian-science-commits
mailing list