[r-cran-matchit] 06/45: Import Upstream version 2.1-3
Andreas Tille
tille at debian.org
Fri Oct 20 06:17:18 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-matchit.
commit fcb22a3cc702df14bc69c16397834daab7b2cd1f
Author: Andreas Tille <tille at debian.org>
Date: Fri Oct 20 07:40:51 2017 +0200
Import Upstream version 2.1-3
---
DESCRIPTION | 16 ++-
NAMESPACE | 32 ++---
R/diagnose.R | 82 -----------
R/discard.R | 30 ++++
R/distance.R | 136 ------------------
R/distance2GAM.R | 30 ++++
R/distance2glm.R | 50 +++++++
R/distance2mahalanobis.R | 9 ++
R/distance2nnet.R | 6 +
R/distance2rpart.R | 6 +
R/exactmatch.R | 25 ----
R/help.matchit.R | 30 ++--
R/jitter.pscore.R | 29 ++++
R/load.first.R | 14 ++
R/match.data.R | 36 ++++-
R/match.qoi.R | 64 +++++++++
R/matchdef.R | 220 -----------------------------
R/matchit.R | 217 +++++++----------------------
R/matchit.qqplot.R | 86 ++++++++++++
R/matchit2exact.R | 22 +++
R/matchit2full.R | 37 +++++
R/matchit2genetic.R | 47 +++++++
R/matchit2nearest.R | 246 +++++++++++++++++++++++++++++++++
R/matchit2optimal.R | 56 ++++++++
R/matchit2subclass.R | 90 ++++++++++++
R/neyman.R | 164 ----------------------
R/plot.matchit.R | 216 ++---------------------------
R/plot.matchit.subclass.R | 43 ++++++
R/print.matchit.R | 170 ++++-------------------
R/print.matchit.exact.R | 16 +++
R/print.matchit.full.R | 17 +++
R/print.matchit.subclass.R | 12 ++
R/print.neyman.R | 9 --
R/print.summary.matchit.R | 65 ++-------
R/print.summary.matchit.exact.R | 17 +++
R/print.summary.matchit.subclass.R | 20 +++
R/print.summary.neyman.R | 11 --
R/qqsum.R | 18 +++
R/subclassify.R | 124 -----------------
R/summary.matchit.R | 268 ++++++++----------------------------
R/summary.matchit.exact.R | 29 ++++
R/summary.matchit.full.R | 70 ++++++++++
R/summary.matchit.subclass.R | 110 +++++++++++++++
R/summary.neyman.R | 14 --
R/weights.matrix.R | 47 +++++++
R/weights.subclass.R | 43 ++++++
data/incomedata.tab | 201 ---------------------------
demo/00Index | 11 +-
demo/analysis.R | 187 ++++++++++++++-----------
demo/diagnose.R | 35 -----
demo/exact.R | 21 +++
demo/full.R | 24 ++++
demo/genetic.R | 18 +++
demo/lalonde.R | 112 ---------------
demo/match.data.R | 46 +++++++
demo/nearest.R | 101 ++++++++++++++
demo/optimal.R | 23 ++++
demo/subclass.R | 24 ++++
man/diagnose.Rd | 85 ------------
man/distance.Rd | 77 -----------
man/exactmatch.Rd | 59 --------
man/help.matchit.Rd | 6 +-
man/incomedata.Rd | 23 ----
man/match.data.Rd | 57 ++++++--
man/matchdef.Rd | 85 ------------
man/matchit.Rd | 274 ++++++++++++++++++++++---------------
man/neyman.Rd | 47 -------
man/plot.matchit.Rd | 39 ------
man/print.matchit.Rd | 36 -----
man/subclassify.Rd | 85 ------------
man/summary.matchit.Rd | 65 ---------
man/summary.neyman.Rd | 44 ------
72 files changed, 2078 insertions(+), 2806 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index b5f700d..75477d3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,19 +1,21 @@
Package: MatchIt
-Version: 1.0-2
-Date: 2005-8-10
+Version: 2.1-3
+Date: 2005-10-05
Title: MatchIt: Nonparametric Preprocessing for Parametric Casual Inference
-Author: Daniel Ho <daniel.ho at yale.edu>,
+Author: Daniel Ho <daniel.e.ho at gmail.com>,
Kosuke Imai <kimai at Princeton.Edu>,
Gary King <king at harvard.edu>,
Elizabeth Stuart <stuart at stat.harvard.edu>
-Maintainer: Daniel Ho <daniel.ho at yale.edu>
-Depends: R (>= 2.0)
-Suggests: Zelig, optmatch
+Maintainer: Kosuke Imai <kimai at Princeton.Edu>
+Depends: R (>= 2.1), MASS, Zelig
+Suggests: optmatch, Matching
Description: MatchIt selects matched samples of the
the original treated and control groups with similar
covariate distributions -- can be used to match exactly
on covariates, to match on propensity scores, or perform
a variety of other matching procedures.
+LazyLoad: yes
+LazyData: yes
License: GPL version 2 or newer
URL: http://gking.harvard.edu/matchit
-Packaged: Wed Aug 10 14:48:40 2005; kimai
+Packaged: Thu Oct 6 21:05:56 2005; kimai
diff --git a/NAMESPACE b/NAMESPACE
index 7b19386..ed73030 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,25 +1,19 @@
-export(
- matchdef,
- match.data,
- matchit,
- diagnose,
- distance,
- subclassify,
- exactmatch,
- neyman,
- help.matchit,
- print.matchit,
- print.summary.matchit,
- print.summary.neyman,
- plot.matchit,
- summary.matchit,
- summary.neyman
- )
+import(MASS)
+importFrom(Zelig, user.prompt)
+export(matchit,
+ help.matchit,
+ match.data)
S3method(print, matchit)
+S3method(print, matchit.exact)
+S3method(print, matchit.subclass)
S3method(print, summary.matchit)
-S3method(print, summary.neyman)
+S3method(print, summary.matchit.exact)
+S3method(print, summary.matchit.subclass)
S3method(plot, matchit)
+S3method(plot, matchit.subclass)
S3method(summary, matchit)
-S3method(summary, neyman)
+S3method(summary, matchit.exact)
+S3method(summary, matchit.subclass)
+S3method(summary, matchit.full)
diff --git a/R/diagnose.R b/R/diagnose.R
deleted file mode 100644
index e268322..0000000
--- a/R/diagnose.R
+++ /dev/null
@@ -1,82 +0,0 @@
-diagnose <-function(formula, match.matrix, pscore, in.sample, data,
- exact = FALSE, mahvars = NULL, subclass = 0,
- psclass = NULL, nearest = TRUE, q.cut = NULL,
- counter = TRUE){
- if(counter){cat("Calculating summary statistics...")}
- treata <- model.frame(formula,data)[,1,drop=FALSE]
- treat <- as.vector(treata[,1])
- names(treat) <- row.names(treata)
- n <- length(treat)
- n0 <- length(treat[treat==0])
- n1 <- length(treat[treat==1])
-
- if(is.null(names(treat))){names(treat) <- seq(1,n)}
- labels <- names(treat)
- clabels <- labels[treat==0]
- tlabels <- labels[treat==1]
-
- # setting up weights
- if(nearest==TRUE){
- match.matrix <- match.matrix[in.sample[treat==1],,drop=F]
- num.matches <- dim(match.matrix)[2]-apply(as.matrix(match.matrix), 1, function(x) { sum(is.na(x)) })
- names(num.matches) <- tlabels[in.sample[treat==1]]
-
- t.units <- row.names(match.matrix)[num.matches>0]
- c.units <- na.omit(as.vector(as.matrix(match.matrix)))
- matched <-c(t.units,unique(c.units))
-
- weights <- rep(0,length(treat))
- names(weights) <- labels
- weights[t.units] <- 1
-
- for (cont in clabels) {
- treats <- na.omit(row.names(match.matrix)[cont==match.matrix[,1]])
- if (dim(match.matrix)[2]>1) {
- for (j in 2:dim(match.matrix)[2])
- treats <- c(na.omit(row.names(match.matrix)[cont==match.matrix[,j]]),treats)
- }
- weights[cont] <- length(unique(treats))
- }
-
- if (sum(weights[clabels])==0){
- weights[clabels] <- rep(0, length(weights[clabels]))
- } else {
- weights[clabels] <-length(weights[clabels][weights[clabels]>0])*weights[clabels]/sum(weights[clabels][weights[clabels]>0])
- }
-
- } else if(identical(exact,TRUE)) {
-
- weights <- rep(0,length(treat))
- names(weights) <- labels
- t.units <- names(psclass)[psclass>0 & treat==1]
- weights[t.units] <- 1
- for(j in 1:max(psclass)){
- qn0 <- sum(treat==0 & psclass==j)
- qn1 <- sum(treat==1 & psclass==j)
- weights[treat==0 & psclass==j] <- qn1/qn0
- }
- if (sum(weights[treat==0])==0) {
- weights[treat==0] <- rep(0, length(weights[clabels]))
- } else {
- weights[clabels] <- length(weights[treat==0][weights[treat==0]>0]) *
- weights[treat==0]/sum(weights[treat==0][weights[treat==0]>0])
- }
- matched <- names(treat)
-
- } else {
- matched <- names(treat)
- weights <- rep(1,length(treat))
- names(weights) <- names(treat)
- }
- weights[!in.sample] <- 0
- if (sum(weights)==0) {
- stop("Error: No units were matched")
- } else if (sum(weights[tlabels])==0){
- stop("No treated units were matched",call.=FALSE)
- } else if (sum(weights[clabels])==0){
- stop("No control units were matched",call.=FALSE)
- }
- cat("Done\n")
- list(weights=weights)
-}
-
diff --git a/R/discard.R b/R/discard.R
new file mode 100644
index 0000000..6188e53
--- /dev/null
+++ b/R/discard.R
@@ -0,0 +1,30 @@
+discard <- function(treat, pscore, option, X) {
+
+ n.obs <- length(treat)
+ pmax0 <- max(pscore[treat==0])
+ pmax1 <- max(pscore[treat==1])
+ pmin0 <- min(pscore[treat==0])
+ pmin1 <- min(pscore[treat==1])
+ if (option == "none") # keep all units
+ discarded <- rep(FALSE, n.obs)
+ else if (option == "both") # discard units outside of common support
+ discarded <- (pscore < max(pmin0, pmin1) | pscore > min(pmax0, pmax1))
+ else if (option == "control") # discard control units only
+ discarded <- (pscore < pmin1 | pscore > pmax1)
+ else if (option == "treat") # discard treated units only
+ discarded <- (pscore < pmin0 | pscore > pmax0)
+ else if (option == "convex.hull"){ # discard units not in T convex hull
+ if (!("whatif" %in% .packages(all = TRUE)))
+ install.packages("whatif", CRAN="http://gking.harvard.edu")
+ if (!("lpSolve" %in% .packages(all = TRUE)))
+ install.packages("lpSolve")
+ require(whatif)
+ require(lpSolve)
+ wif <- whatif(cfact = X[treat==0,], data = X[treat==1,])
+ discarded <- rep(FALSE, n.obs)
+ discarded[treat==0] <- !wif$in.hull
+ } else
+ stop("invalid input for `discard'")
+ names(discarded) <- names(treat)
+ return(discarded)
+}
diff --git a/R/distance.R b/R/distance.R
deleted file mode 100644
index adde38a..0000000
--- a/R/distance.R
+++ /dev/null
@@ -1,136 +0,0 @@
-distance <- function(formula, model="logit", data,
- discard=0,reestimate=FALSE,counter=TRUE,...){
- if(counter){cat("Calculating propensity score...")}
- mf<-match.call()
- if(!is.vector(discard) | !identical(round(length(discard)),1)){
- warning("discard=", mf$discard,
- " is invalid; used discard=0 instead", call.=FALSE)
- discard <- 0}
- if(!(identical(reestimate,TRUE) | identical(reestimate,FALSE))){
- warning("reestimate=",
- mf$reestimate," is invalid; used reestimate=FALSE instead",
- call.=FALSE)
- mf$reestimate=FALSE}
- mf$counter <- mf$discard<-mf$model<-mf$reestimate<-NULL
- dotsub <- eval(mf$subset, sys.frame(sys.parent()))
- a <- 0
- while(a<2){
- if(a==1)
- if(is.null(dotsub))
- mf$subset<-in.sample
- else{
- dotsub[dotsub] <- in.sample
- mf$subset <- dotsub
- }
- if(model=="logit" || model=="probit"){
- mf[[1]]<-as.name("glm")
- if(model=="logit"){mf$family<-as.name("binomial")} else
- {mf$family <- binomial(link="probit")} #is there a better way to define the link function here?
- res<-eval(as.call(mf), sys.frame(sys.parent()))
- if(a==0){
- pscore<-fitted(res)
- treat<-res$y
- covariates <- res$model[,-1,drop=F]
- }
- else{
- pscore[!in.sample] <- NA
- pscore[in.sample] <- fitted(res)
- }
- assign.model<-summary(res)
- }
- else if(model=="nnet"){
- require(nnet)
- mf[[1]] <- as.name("nnet.formula")
- if(is.null(mf$size)){mf$size <- 3} # default setting?
- res <- eval(as.call(mf),sys.frame(sys.parent()))
- if(a==0){
- pscore <- as.vector(fitted(res))
- nnames <- dimnames(fitted(res))[[1]]
- names(pscore) <- nnames
- if(is.null(mf$data)){
- treat <- model.frame(formula(res))[,1]
- covariates <- model.frame(delete.response(terms(formula(res))),drop=F)
- }else{
- treat <- model.frame(formula(res),data)[,1]
- covariates <- model.frame(delete.response(terms(formula(res))),data,drop=F)
- }
- if(!is.null(dotsub)){
- treat <- treat[dotsub]
- covariates <- covariates[dotsub,,drop=F]
- }
- names(treat) <- nnames
- assign.model <- summary(res)
- }
- else{
- pscore[!in.sample] <- NA
- pscore[in.sample] <- as.vector(fitted(res))
- assign.model <- res
- dan <- mf
- }
- }
- else if(model=="GAM"){
- require(mgcv)
- mf[[1]] <- as.name("gam")
- mf$family<-as.name("binomial")
- res <- eval(as.call(mf),sys.frame(sys.parent()))
- if(a==0){
- treat <- res$y
- names(treat) <- row.names(data) #assuming we have a data frame
- pscore <- fitted(res)
- names(pscore) <- names(treat)
- covariates <- res$model[,2:ncol(res$model),drop=F]
- }
- else{
- pscore[!in.sample] <- NA
- pscore[in.sample] <- fitted(res)
- }
- assign.model <- summary(res)
- }
- else if(model=="cart"){
- require(rpart)
- mf[[1]] <- as.name("rpart")
- res <- eval(as.call(mf),sys.frame(sys.parent()))
- if(a==0){
- pscore <- predict(res)
- treat <- res$y
- if(!is.null(dotsub)){
- covariates <- data[dotsub,attr(delete.response(terms(formula)),"term.labels"),drop=F]
- } else {covariates <- data[,attr(delete.response(terms(formula)),"term.labels"),drop=F]}
- }
- else{
- pscore[!in.sample] <- NA
- pscore[in.sample] <- predict(res)
- }
- assign.model <- print(res)
- }
- else {stop("Must specify valid model to estimate propensity score",call.=FALSE)}
- if(a==0){
- maxp0 <- max(pscore[treat==0])
- minp0 <- min(pscore[treat==0])
- maxp1 <- max(pscore[treat==1])
- minp1 <- min(pscore[treat==1])}
- if(discard==0){
- in.sample <- !is.na(treat) #just to make all TRUE
- a <- 2}
- else if(discard==1){
- if(a==0){in.sample <- (pscore>=max(minp1,minp0) & pscore<=min(maxp0,maxp1))}
- if(reestimate){(a <- a+1)}
- else {a <- 2}
- }
- else if(discard==2){
- if(a==0){in.sample <- (pscore>=minp1 & pscore<=maxp1)}
- if(reestimate){a <- a+1}
- else {a <- 2}
- }
- else if(discard==3){
- if(a==0){in.sample <- (pscore>=minp0 & pscore<=maxp0)}
- if(reestimate){a <- a+1}
- else {a <- 2}
- }
- else(stop("Invalid discard option",call.=FALSE))
- }
- ## return
- if(counter){cat("Done\n")}
- list(in.sample=in.sample, pscore=pscore, treat=treat, covariates=covariates,
- assign.model=assign.model)
-}
diff --git a/R/distance2GAM.R b/R/distance2GAM.R
new file mode 100644
index 0000000..862ba04
--- /dev/null
+++ b/R/distance2GAM.R
@@ -0,0 +1,30 @@
+distance2GAMlogit <- function(formula, data, ...) {
+ require(mgcv)
+ res <- gam(formula, data, family=binomial(logit), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
+distance2GAMprobit <- function(formula, data, ...) {
+ require(mgcv)
+ res <- gam(formula, data, family=binomial(probit), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
+distance2GAMcloglog <- function(formula, data, ...) {
+ require(mgcv)
+ res <- gam(formula, data, family=binomial(cloglog), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
+distance2GAMlog <- function(formula, data, ...) {
+ require(mgcv)
+ res <- gam(formula, data, family=binomial(log), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
+distance2GAMcauchit <- function(formula, data, ...) {
+ require(mgcv)
+ res <- gam(formula, data, family=binomial(cauchit), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
diff --git a/R/distance2glm.R b/R/distance2glm.R
new file mode 100644
index 0000000..c810c54
--- /dev/null
+++ b/R/distance2glm.R
@@ -0,0 +1,50 @@
+distance2logit <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(logit), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
+distance2linear.logit <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(logit), ...)
+ return(list(model = res, distance = predict(res)))
+}
+
+distance2probit <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(probit), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
+distance2linear.probit <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(probit), ...)
+ return(list(model = res, distance = predict(res)))
+}
+
+distance2cloglog <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(cloglog), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
+distance2linear.cloglog <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(cloglog), ...)
+ return(list(model = res, distance = predict(res)))
+}
+
+distance2log <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(log), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
+distance2linear.log <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(log), ...)
+ return(list(model = res, distance = predict(res)))
+}
+
+distance2cauchit <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(cauchit), ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
+distance2linearcauchit <- function(formula, data, ...) {
+ res <- glm(formula, data, family=binomial(cauchit), ...)
+ return(list(model = res, distance = predict(res)))
+}
+
diff --git a/R/distance2mahalanobis.R b/R/distance2mahalanobis.R
new file mode 100644
index 0000000..3e20bc0
--- /dev/null
+++ b/R/distance2mahalanobis.R
@@ -0,0 +1,9 @@
+distance2mahalanobis <- function(formula, data, ...) {
+ X <- model.matrix(formula, data)
+ # Take intercept column out
+ X <- X[,-1]
+ Sigma <- var(X)
+ return(list(model = NULL,
+ distance = mahalanobis(X, colMeans(X), cov(X))))
+}
+
diff --git a/R/distance2nnet.R b/R/distance2nnet.R
new file mode 100644
index 0000000..e4d10ff
--- /dev/null
+++ b/R/distance2nnet.R
@@ -0,0 +1,6 @@
+distance2nnet <- function(formula, data, ...) {
+ require(nnet)
+ res <- nnet(formula, data, ...)
+ return(list(model = res, distance = fitted(res)))
+}
+
diff --git a/R/distance2rpart.R b/R/distance2rpart.R
new file mode 100644
index 0000000..091282d
--- /dev/null
+++ b/R/distance2rpart.R
@@ -0,0 +1,6 @@
+distance2rpart <- function(formula, data, ...) {
+ require(rpart)
+ res <- rpart(formula, data, ...)
+ return(list(model = res, distance = predict(res)))
+}
+
diff --git a/R/exactmatch.R b/R/exactmatch.R
deleted file mode 100644
index 75756dc..0000000
--- a/R/exactmatch.R
+++ /dev/null
@@ -1,25 +0,0 @@
-exactmatch <- function(formula,data,counter=TRUE) {
- if(counter){cat("Exact matching...")}
- data <- eval(data,parent.frame())
- treata <- model.frame(formula,data)[,1,drop=FALSE]
- treat <- as.vector(treata[,1])
- names(treat) <- row.names(treata)
- covariates <- model.frame(delete.response(terms(formula)),data)[,,drop=FALSE]
- n <- length(treat)
- n1 <- length(treat[treat==1])
- n0 <- length(treat[treat==0])
- covariates <- as.data.frame(covariates)
- xx <- apply(covariates, 1, function(x) paste(x, collapse = "\r"))
- xx1 <- xx[treat==1]
- xx0 <- xx[treat==0]
- cc <- unique(xx1)
- cc <- cc[cc%in%xx0]
- ncc <- length(cc)
- psclass <- rep(0,n)
- names(psclass) <- names(treat)
- for(i in 1:ncc){
- psclass[xx==cc[i]] <- i
- }
- if(counter){cat("Done\n")}
- list(psclass = psclass)
-}
diff --git a/R/help.matchit.R b/R/help.matchit.R
index 014f9e1..c618d4c 100644
--- a/R/help.matchit.R
+++ b/R/help.matchit.R
@@ -21,19 +21,23 @@ help.matchit <- function (object=NULL)
url <- c("http://gking.harvard.edu/matchit")
if (!is.null(object)) {
if (object == "matchit")
- url <- c("http://gking.harvard.edu/matchit/docs/Usage.html")
- if (object == "distance")
- url <- c("http://gking.harvard.edu/matchit/docs/_TT_distance_TT.html")
- if (object == "matchdef")
- url <- c("http://gking.harvard.edu/matchit/docs/_TT_matchdef_TT.html")
- if (object == "subclassify")
- url <- c("http://gking.harvard.edu/matchit/docs/_TT_subclassify_TT.html")
- if (object == "print.matchit")
- url <- c("http://gking.harvard.edu/matchit/docs/Print.html")
- if (object == "summary.matchit")
- url <- c("http://gking.harvard.edu/matchit/docs/Summary.html")
- if (object == "plot.matchit")
- url <- c("http://gking.harvard.edu/matchit/docs/Plot.html")
+ url <- c("http://gking.harvard.edu/matchit/docs/Reference_Manual.html")
+ if (object == "exact")
+ url <- c("http://gking.harvard.edu/matchit/docs/Exact_Matching2.html")
+ if (object == "subclass")
+ url <- c("http://gking.harvard.edu/matchit/docs/Subclassification2.html")
+ if (object == "nearest")
+ url <- c("http://gking.harvard.edu/matchit/docs/Nearest_Neighbor_Match2.html")
+ if (object == "optimal")
+ url <- c("http://gking.harvard.edu/matchit/docs/Optimal_Matching2.html")
+ if (object == "full")
+ url <- c("http://gking.harvard.edu/matchit/docs/Full_Matching2.html")
+ if (object == "match.data")
+ url <- c("http://gking.harvard.edu/matchit/docs/_TT_match_data_TT.html")
+ if (object == "summary")
+ url <- c("http://gking.harvard.edu/matchit/docs/_TT_summary_TT.html")
+ if (object == "plot")
+ url <- c("http://gking.harvard.edu/matchit/docs/_TT_plot_TT.html")
}
if (is.null(url)) {
diff --git a/R/jitter.pscore.R b/R/jitter.pscore.R
new file mode 100644
index 0000000..60ffc2b
--- /dev/null
+++ b/R/jitter.pscore.R
@@ -0,0 +1,29 @@
+jitter.pscore <- function(x, interactive){
+ treat <- x$treat
+ pscore <- x$distance
+ weights <- x$weights
+ matched <- weights!=0
+ q.cut <- x$q.cut
+ jitp <- jitter(rep(1,length(treat)),factor=6)-(treat==0)
+ cwt <- sqrt(weights)
+ plot(pscore,xlim=range(na.omit(pscore)),ylim=c(-1,2),
+ type="n",ylab="",xlab="Propensity Score",
+ axes=F,main="Distribution of Propensity Scores")
+ if(!is.null(q.cut)){abline(v=q.cut,col="grey",lty=1)}
+ points(pscore[treat==1&weights!=0],jitp[treat==1&weights!=0],
+ pch=18,cex=cwt[treat==1&weights!=0])
+ points(pscore[treat==1&weights==0],jitp[treat==1&weights==0],
+ pch=5,col="grey",cex=0.5)
+ points(pscore[treat==0&weights!=0],jitp[treat==0&weights!=0],
+ pch=18,cex=cwt[treat==0&weights!=0])
+ points(pscore[treat==0&weights==0],jitp[treat==0&weights==0],
+ pch=5,col="grey",cex=0.5)
+ axis(1)
+ text(sum(range(na.omit(pscore)))/2,1.5,"Treatment Units")
+ text(sum(range(na.omit(pscore)))/2,-0.5,"Control Units")
+ box()
+ if(interactive==TRUE) {
+ print("To identify the units, use first mouse button; to stop, use second.")
+ identify(pscore,jitp,names(treat))
+ }
+}
diff --git a/R/load.first.R b/R/load.first.R
new file mode 100644
index 0000000..7e792e2
--- /dev/null
+++ b/R/load.first.R
@@ -0,0 +1,14 @@
+.onAttach <- function(...) {
+ mylib <- dirname(system.file(package = "MatchIt"))
+ ver <- packageDescription("MatchIt", lib = mylib)$Version
+ builddate <- packageDescription("MatchIt", lib = mylib)$Date
+ cat(paste("## \n## MatchIt (Version ", ver, ", built: ", builddate, ")\n", sep = ""))
+ cat("## Please refer to http://gking.harvard.edu/matchit for full documentation \n",
+ "## or help.matchit() for help with commands supported by MatchIt.\n##\n",
+ sep="")
+ if(!any(search()=="package:MASS"))
+ require(MASS)
+ if(!any(search()=="package:Zelig"))
+ require(Zelig)
+ options(digits = 4)
+}
diff --git a/R/match.data.R b/R/match.data.R
index e5a6689..2228657 100644
--- a/R/match.data.R
+++ b/R/match.data.R
@@ -1,12 +1,36 @@
-match.data <- function(object, group = "all") {
- data <- object$data
- treat <- eval(object$treat, data)
+match.data <- function(object, group = "all", distance = "distance",
+ weights = "weights", subclass = "subclass") {
+ data <- eval(object$call$data)
+ treat <- object$treat
+ wt <- object$weights
+ vars <- names(data)
+ if (distance %in% vars)
+ stop("invalid input for distance. choose a different name.")
+ else if (!is.null(object$distance)) {
+ dta <- data.frame(cbind(data, object$distance))
+ names(dta) <- c(names(data), distance)
+ data <- dta
+ }
+ if (weights %in% vars)
+ stop("invalid input for weights. choose a different name.")
+ else if (!is.null(object$weights)){
+ dta <- data.frame(cbind(data, object$weights))
+ names(dta) <- c(names(data), weights)
+ data <- dta
+ }
+ if (subclass %in% vars)
+ stop("invalid input for subclass. choose a different name.")
+ else if (!is.null(object$subclass)){
+ dta <- data.frame(cbind(data, object$subclass))
+ names(dta) <- c(names(data), subclass)
+ data <- dta
+ }
if (group == "all")
- return(data[object$matched,])
+ return(data[wt > 0,])
else if (group == "treat")
- return(data[object$matched & treat == 1,])
+ return(data[wt > 0 & treat == 1,])
else if (group == "control")
- return(data[object$matched & treat == 0,])
+ return(data[wt > 0 & treat == 0,])
else
stop("error: invalid input for group.")
}
diff --git a/R/match.qoi.R b/R/match.qoi.R
new file mode 100644
index 0000000..07ce657
--- /dev/null
+++ b/R/match.qoi.R
@@ -0,0 +1,64 @@
+## Function to calculate summary stats
+qoi <- function(xx,tt,ww, t.plot=NULL, c.plot=NULL, sds=NULL){
+ weighted.var <- function(x, w) {
+ sum(w * (x - weighted.mean(x,w))^2)/(sum(w) - 1)}
+ xsum <- matrix(NA,2,7)
+ xsum <- as.data.frame(xsum)
+ row.names(xsum) <- c("Full","Matched")
+ names(xsum) <- c("Means Treated","Means Control","Treated SD","Std. Bias", "QQ Med",
+ "QQ Mean", "QQ Max")
+ x1 <- xx[tt==1]
+ x0 <- xx[tt==0]
+ ww1 <- ww[tt==1]
+ ww0 <- ww[tt==0]
+ xsum[1,1] <- mean(x1,na.rm=T)
+ xsum[1,2] <- mean(x0,na.rm=T)
+ X.t.m <- xx[tt==1][ww1>0]
+ X.c.m <- xx[tt==0][ww0>0]
+ xsum[2,1] <- weighted.mean(X.t.m, ww1[ww1>0])
+ xsum[2,2] <- weighted.mean(X.c.m, ww0[ww0>0])
+ if(!(sum(tt==1)<2|(sum(tt==0)<2))){
+ xsum[1,3] <- sd(x1,na.rm=T)
+ qqall <- qqsum(x1,x0)
+ xsum[1,5:7] <- c(qqall$meddiff,qqall$meandiff,qqall$maxdiff)
+ #xsum[1,7] <- (mean(x1,na.rm=T)-mean(x0,na.rm=T))/sd(x1,na.rm=T)
+ if (!is.null(sds)) xsum[1,4] <- (mean(x1,na.rm=T)-mean(x0,na.rm=T))/sds
+ else xsum[1,4] <- (mean(x1,na.rm=T)-mean(x0,na.rm=T))/xsum[1,3]
+ xsum[2,3] <- sqrt(weighted.var(x1[ww1>0],ww1[ww1>0]))
+ if(!is.null(t.plot)) qqmat <- qqsum(xx[t.plot],xx[c.plot])
+ else qqmat <- qqsum(x1[ww1>0],x0[ww0>0])
+ xsum[2,5:7] <- c(qqmat$meddiff,qqmat$meandiff,qqmat$maxdiff)
+ #xsum[2,7] <- (xsum[2,1]-xsum[2,2])/sqrt(weighted.var(X.t.m,ww1[ww1>0]))
+ if (!is.null(sds)) xsum[2,4] <- (xsum[2,1]-xsum[2,2])/sds
+ xsum[2,4] <- (xsum[2,1]-xsum[2,2])/xsum[1,3]
+ }
+ xsum
+}
+
+## By subclass
+qoi.by.sub <- function(xx,tt,ww,qq){
+ qbins <- max(qq,na.rm=TRUE)
+ q.table <- matrix(0,7,qbins)
+ qn <- matrix(0,3,qbins)
+ matched <- ww!=0
+ for (i in 1:qbins)
+ {
+ qi <- qq[matched]==i & (!is.na(qq[matched]))
+ qx <- xx[matched][qi]
+ qt <- tt[matched][qi]
+ qw <- as.numeric(ww[matched][qi]!=0)
+ if(sum(qt==1)<2|(sum(qt==0)<2)){
+ if(sum(qt==1)<2){warning("Not enough treatment units in subclass ",i,call.=FALSE)}
+ else if(sum(qt==0)<2){warning("Not enough control units in subclass ",i,call.=FALSE)}
+ }
+ qoi.i <- qoi(qx,qt,qw, sds=sd(xx[tt==1],na.rm=T))
+ q.table[,i] <- as.numeric(qoi.i[1,])
+ qn[,i] <- c(sum(qt),sum(qt==0),length(qt))
+ }
+ q.table <- as.data.frame(q.table)
+ qn <- as.data.frame(qn)
+ names(q.table) <- names(qn) <- paste("Subclass",1:qbins)
+ row.names(q.table) <- names(qoi.i)
+ row.names(qn) <- c("Treated","Control","Total")
+ list(q.table=q.table,qn=qn)
+}
diff --git a/R/matchdef.R b/R/matchdef.R
deleted file mode 100644
index a0cc5aa..0000000
--- a/R/matchdef.R
+++ /dev/null
@@ -1,220 +0,0 @@
-matchdef <- function(formula, in.sample, pscore, nearest = TRUE,
- replace = FALSE, m.order = 2, ratio = 1,
- caliper = 0, calclosest = FALSE, mahvars = NULL,
- exact = FALSE, data = NULL, counter = TRUE, ...){
- treata <- model.frame(formula,data)[,1,drop=FALSE]
- treat <- as.vector(treata[,1])
- names(treat) <- row.names(treata)
-
- covariates <- model.frame(delete.response(terms(formula)),data)[,,drop=FALSE]
-
- ## Total number of units
- n <- length(treat)
- n1 <- length(treat[treat==1])
- n0 <- length(treat[treat==0])
-
- p1 <- pscore[treat==1]
- p0 <- pscore[treat==0]
-
- ## Generating separate indices for treated and control units
- if(is.null(names(treat))){names(treat) <- seq(1,n)}
- labels <- names(treat)
- clabels <- labels[treat==0]
- tlabels <- labels[treat==1]
-
- ## Generating match matrix
- match.matrix <- as.data.frame(matrix(0,n1,ratio), row.names = tlabels)
-
- ## Vectors of whether unit has been matched:
- ## = 0 if not matched (unit # of match if matched)
- ## = -1 if can't be matched (if in.sample=0)
- matchedc <- rep(0,length(p0))
- names(matchedc) <- clabels
-
- ## optimal ratio matching
- if (m.order==1) {
- check <- library()
- if(any(check$results[,"Package"] == "optmatch"))
- require(optmatch)
- else
- stop("Please install optmatch using \n install.packages(\"optmatch\", contriburl= \"http://www.stat.lsa.umich.edu/~bbh/optmatch\")")
- cat("Optimal Matching Treated: ")
- distance <- matrix(0, ncol=n0, nrow=n1)
- rownames(distance) <- row.names(treata)[treat==1]
- colnames(distance) <- row.names(treata)[treat==0]
- for (i in 1:n1)
- distance[i,] <- abs(p1[i]-p0)
- full <- as.matrix(fullmatch(distance, min.controls = ratio,
- max.controls = ratio,
- omit.fraction = (n0-ratio*n1)/n0,...))
- psclass <- full[pmatch(row.names(treata), row.names(full)),]
- psclass <- as.numeric(as.factor(psclass))
- names(psclass) <- row.names(treata)
- for (i in 1:n1) {
- match.matrix[i,] <-
- names(which(psclass[tlabels[i]]==
- psclass[-pmatch(tlabels[i],
- names(psclass))]))
- }
- if(counter==TRUE){cat("Done\n")}
-
- } else if(nearest) {
- ## These are the units that are ineligible because of discard
- ## (in.sample==0)
- matchedc[in.sample[clabels]==0] <- -1
- match.matrix[in.sample[tlabels]==0,] <- -1
- matchedt <- match.matrix[,1]
- names(matchedt) <- tlabels
-
- ## total number of matches (including ratios) = ratio * n1
- tr <- length(match.matrix[match.matrix!=-1])
- r <- 1
-
- ## Caliper for matching (=0 if caliper matching not done)
- sd.cal <- caliper*sqrt(var(pscore[in.sample==1]))
-
- ## Var-covar matrix for Mahalanobis (currently set for full sample)
- if (!is.null(mahvars)) {
- if(!sum(mahvars%in%names(data))==length(mahvars))
- stop("Mahvars not contained in data",call.=FALSE)
- mahvars <- as.matrix(data[,mahvars])
- row.names(mahvars) <- labels
- Sigma <- var(mahvars)
- }
-
- ## Now for exact matching
- ## If exact=T, set exact to covariates from distance
- ## Make new variable that is T/F for exact matching, then exact will have the vars if
- ## exactmatch=T
- exactmatch <- exact
- if (!is.logical(exact)) {
- exactmatch <- TRUE} else if (identical(exact,TRUE)) {exact <- covariates}
-
- if (exactmatch==TRUE){
- if(!sum(exact%in%names(data))==length(exact))
- stop("Exact variables not contained in data",call.=FALSE)
- exact <- as.matrix(data[,exact])
- row.names(exact) <- labels
- }
- ## Looping through nearest neighbour matching for all treatment units
- ## Only do matching for units with in.sample==1 (matched!=-1)
- if(counter==TRUE){
- trseq <- floor(seq(tr/10,tr,tr/10))
- cat("Matching Treated: ")
- }
-
- for(i in 1:tr){
- ## Make new matchedc column to be used for exact matching
- ## Will only be 0 (eligible for matching) if it's an exact match
- if(counter==TRUE) {if(i%in%trseq){cat(10*which(trseq==i),"%...",sep="")}} # a counter
- matchedc2 <- matchedc
- ##in cases there's no replacement and all controls have been used up
- if(!0%in%matchedc2){
- match.matrix[match.matrix[,r]==0 & !is.na(match.matrix[,r]),r] <- NA
- if(r<ratio){match.matrix[,(r+1):ratio] <- NA}
- break
- }
-
- ##in case there's replacement, but all units have been used in
- ##previous ratios
- if(sum(!is.na(match.matrix[,r]))==0){
- if(r<ratio){match.matrix[,(r+1):ratio] <- NA}
- break
- }
-
- ## Which ratio we're on
- if(r!=ceiling(i/(tr/ratio))) {r <- r+1; matchedt <- match.matrix[,r]}
-
- if(m.order==2) {iterp1 <- max(p1[matchedt==0],na.rm=T)}
- if(m.order==3) {iterp1 <- min(p1[matchedt==0],na.rm=T)}
- if(m.order==4) {iterp1 <- sample(p1[matchedt==0][!is.na(p1[matchedt==0])],1)}
-
- ## The treatment unit for this iteration, again resolving ties randomly
- itert <- as.vector(na.omit(tlabels[iterp1==p1 & matchedt==0]))
- if(length(itert)>1){itert <- sample(itert,1)}
-
- ## Calculating all the absolute deviations in propensity scores
- ## Calculate only for those eligible to be matched (matchedc==0)
- ## this first if statement only applies to replacement ratio
- ## matching, so that each treatment unit is matched to a different
- ## control unit than from the previous round
-
- ## match number = NA if no units within caliper
-
- ## Set things up for exact matching
- ## Make matchedc2==-2 if it isn't an exact match
- ## There might be a more efficient way to do this, but I couldn't figure
- ## out another way to compare a vector with the matrix
- if (exactmatch==TRUE) {
- for (k in 1:dim(exact)[2]) matchedc2[exact[itert,k]!=exact[clabels,k]] <- -2
- }
-
- ## Need to add a check in case there aren't any eligible matches left...
- if(replace==TRUE & r!=1) {
- if (sum(!clabels%in%match.matrix[itert,(1:r-1)] & matchedc2==0)==0) {
- deviation <- NULL
- mindev <- NA
- }
- else
- deviation <- abs(p0[!clabels%in%match.matrix[itert,(1:r-1)] & matchedc2==0]-iterp1)
- } else {
- if (sum(matchedc2==0)==0) {
- deviation <- NULL
- mindev <- NA
- }
- else deviation <- abs(p0[matchedc2==0]-iterp1)
- }
-
- if (caliper!=0 & (!is.null(deviation))) {
- if(replace==TRUE & r!=1)
- pool <- clabels[!clabels%in%match.matrix[itert,(1:r-1)]
- & matchedc2==0][deviation <= sd.cal]
- else
- pool <- clabels[matchedc2==0][deviation <= sd.cal]
- if(length(pool)==0) {
- if (calclosest==FALSE) mindev <- NA
- else {
- if (replace==TRUE & r!= 1){
- mindev <- clabels[!clabels%in%match.matrix[itert,(1:r-1)]][min(deviation)==deviation]
- } else{mindev <- clabels[matchedc2==0][min(deviation)==deviation]}
- }
- }
- else if (length(pool)==1) mindev <- pool[1]
- else if (is.null(mahvars)) mindev <- sample(pool, 1)
- else {
- ## This has the important vars for the C's within the caliper
- poolvarsC <- mahvars[pool,,drop=F]
- ## Sigma is the full group var/covar matrix of Mahalvars
- mahal <- mahalanobis(poolvarsC, mahvars[itert,],Sigma)
- mindev <- pool[mahal==min(mahal)]
- }
- } else if(!is.null(deviation)) {
- if (replace==TRUE & r!=1){
- mindev <- clabels[!clabels%in%match.matrix[itert,(1:r-1)] & matchedc2==0][min(deviation)==deviation]
- } else {mindev <- clabels[matchedc2==0][min(deviation)==deviation]}
- }
-
- ## Resolving ties in minimum deviation by random draw
- if(length(mindev)>1){goodmatch <- sample(mindev,1)} else goodmatch <- mindev
-
- ## Storing which treatment unit has been matched to control, and
- ## vice versa
- matchedt[itert==tlabels] <- goodmatch
- matchedc[goodmatch==clabels] <- itert
-
- ## instead of the in.sample, we now have an index with dimensions n1 by # of
- ## matches (ratio)
- match.matrix[which(itert==tlabels),r] <- goodmatch
-
- ## If matching with replacement, set matchedc back to 0 so it can be reused
- if (replace==TRUE) matchedc[goodmatch==clabels] <- 0
-
- }
- if(counter==TRUE){cat("Done\n")}
- }
- if(!nearest){match.matrix <- NULL} else {
- x <- as.matrix(match.matrix)
- x[x==-1] <- NA
- match.matrix <- as.data.frame(x)}
- list(match.matrix = match.matrix, in.sample = in.sample)
-}
diff --git a/R/matchit.R b/R/matchit.R
index faf5900..07d305b 100644
--- a/R/matchit.R
+++ b/R/matchit.R
@@ -1,174 +1,61 @@
-matchit <- function(formula, model="logit", data, discard=0,
- reestimate=FALSE, nearest=TRUE, replace=FALSE,
- m.order=2, ratio=1, caliper=0, calclosest=FALSE,
- subclass=0, sub.by="treat", mahvars=NULL, exact=FALSE,
- counter=TRUE, full = FALSE, full.options=list(), ...){
- cl <- match.call()
- if (m.order==1 | full)
- require(optmatch)
-
+matchit <- function(formula, data, method = "nearest", distance = "logit",
+ distance.options=list(), discard = "none",
+ reestimate = FALSE, ...) {
+
#Checking input format
#data input
- is.null(data) #there must be a better way to get a warning for data input
+ if(is.null(data)) stop("Dataframe must be specified",call.=FALSE)
if(!is.data.frame(data)){
- stop("Data ", cl$data, " must be a dataframe",call.=FALSE)}
- #check pscore / psweights names in dataframe
- if("pscore"%in%names(data)){
- stop("Dataframe contains the variable 'pscore'. Please change this name.",call. = FALSE)
- }
- if("psweights"%in%names(data)){
- stop("Dataframe contains the variable 'psweights'. Please change this name.",call.=FALSE)
- }
- if("psclass"%in%names(data)){
- stop("Dataframe contains the variable 'psclass'. Please change this name.", call.=FALSE)
- }
-
- #sub.by
- if(!is.vector(sub.by)|length(sub.by)!=1){
- warning(cl$sub.by," is not a valid sub.by option; sub.by=\"treat\" used instead",call.=FALSE); sub.by <- "treat"}
- if(!sub.by%in%c("treat","control","all")){
- warning(cl$sub.by, " is not a valid sub.by option; sub.by=\"treat\" used instead",call.=FALSE); sub.by <- "treat"}
- #subclass
- if(length(subclass)==1){
- if(subclass<0 | subclass==1){
- warning(cl$subclass, " is not a valid subclass; subclass=0 used instead",call.=FALSE); subclass <- 0}
- } else {
- if(!is.vector(subclass)){
- warning(cl$subclass, " is not a valid subclass; subclass=0 used instead",call.=FALSE); subclass <- 0}
- if(sum(subclass<=1 & subclass>=0)!=length(subclass)){
- warning("Subclass ", cl$subclass, " is not bounded by 0 and 1; subclass=0 used instead",
- call.=FALSE); subclass <- 0}
- }
- #nearest
- if(!(identical(nearest,TRUE) | identical(nearest,FALSE))){
- warning("nearest=",cl$nearest," is invalid; used nearest=TRUE instead",call.=FALSE);nearest=TRUE}
- #replace
- if(!(identical(replace,TRUE) | identical(replace,FALSE))){
- warning("replace=",cl$replace," is invalid; used replace=FALSE instead",call.=FALSE);replace=0}
- #m.order
- if(!(identical(m.order,2) | identical(m.order,3) |
- identical(m.order,4) | identical(m.order,1))){
- warning("m.order=",cl$m.order," is invalid; used m.order=2 instead",call.=FALSE);m.order=2}
- #ratio
- ratio <- round(ratio)
- if(!is.numeric(ratio) | ratio[1]<1 | !identical(round(length(ratio)),1)){
- warning("ratio=",cl$ratio," is invalid; used ratio=1 instead",call.=FALSE);ratio=1}
- #caliper
- if(!is.vector(caliper) | !identical(round(length(caliper)),1)){
- warning("caliper=",cl$caliper," is invalid; used caliper=0 instead",call.=FALSE);caliper=0}
- if(caliper<0){
- warning("caliper=",cl$caliper," is less than 0; used caliper=0 instead",call.=FALSE);caliper=0}
- #calclosest
- if(!(identical(calclosest,TRUE)| identical(calclosest,FALSE))){
- warning("calclosest=",cl$calclosest," is invalid; used calclosest=FALSE instead",call.=FALSE)
- calclosest=FALSE}
- #mahvars & caliper
- if (!is.null(mahvars) & caliper[1]==0){
- warning("Must specify caliper > 0 to use Mahalanobis matching. Mahalanobis matching not done",call. = FALSE)}
- #counter
- if(!(identical(counter,TRUE)| identical(counter,FALSE))){
- warning("counter=",cl$counter," is invalid; used counter=TRUE instead",call.=FALSE);counter=TRUE}
- #full matching
- if(!(identical(full,TRUE) | identical(full,FALSE))){
- warning("full=",cl$full,") is invalid; used full=FALSE instead",call. = FALSE)
- full <- FALSE
- } else if(full & nearest){
- warning("Full matching will ignore nearest neighbor inputs",call. = FALSE)
- nearest <- F
- }
+ stop("Data must be a dataframe",call.=FALSE)}
- # Set up for output
- tt <- terms(formula(cl))
-
- if (identical(TRUE, exact)) {
- if(!is.null(c(cl$discard, cl$reestimate, cl$model, cl$nearest, cl$replace, cl$m.order, cl$ratio,
- cl$caliper, cl$calclosest, cl$subclass, cl$sub.by, cl$mahvars)))
- warning("exact=TRUE chosen; all other matching options ignored")
- data <- eval(data,parent.frame())
- mf <- match.call()
- dotsub <- eval(mf$subset,data)
- if(!is.null(dotsub)) {
- data <- data[dotsub,]
- }
-
- treata <- model.frame(formula,data)[,1,drop=FALSE]
- treat <- as.vector(treata[,1])
- names(treat) <- row.names(treata)
- covariates <- model.frame(delete.response(terms(formula)),data)[,,drop=FALSE]
-
- b <- exactmatch(formula, data, counter=counter)
-
- dummy.pscore <- rep(0.5, dim(covariates)[1])
- dummy.in.sample <- rep(TRUE, dim(covariates)[1])
- names(dummy.pscore) <- names(treat)
- names(dummy.in.sample) <- names(treat)
- c <- diagnose(formula, match.matrix=NULL, pscore=dummy.pscore,
- in.sample = dummy.in.sample, data=data,exact=TRUE,
- mahvars=NULL, subclass=max(b$psclass),
- psclass=b$psclass, nearest=FALSE, counter=counter)
-
- psweights <- c$weights
- pscore <- dummy.pscore
-
- if (is.null(b$psclass)) psclass <- rep(1, length(pscore))
- else psclass <- b$psclass
-
- data <- cbind(data,psweights,pscore, psclass)
-
- z <- list(formula=formula, psweights=c$weights,
- treat=attr(terms(formula(cl)),"variables")[[2]],
- in.sample=NULL, match.matrix=b$match.matrix,
- covariates=attr(delete.response(tt),"term.labels"), psclass=b$psclass,
- q.cut=NULL, assign.model=NULL, call=cl, matched=(c$weights!=0),
- data= data)
- class(z) <- "matchit"
- return(z)
- }
+ ## check inputs
+ fn1 <- paste("distance2", distance, sep = "")
+ if (!exists(fn1))
+ stop(distance, "not supported.")
+ fn2 <- paste("matchit2", method, sep = "")
+ if (!exists(fn2))
+ stop(method, "not supported.")
+
+ ## obtain T and X
+ tt <- terms(formula)
+ attr(tt, "intercept") <- 0
+ mf <- model.frame(tt, data)
+ treat <- model.response(mf)
+ X <- model.matrix(tt, data=mf)
+
+ ## estimate the distance measure
+ if (method == "exact") {
+ distance <- out1 <- discarded <- NULL
+ if (!is.null(distance))
+ warning("distance is set to `NULL' when exact matching is used.")
+ }
else {
- mf <- match.call()
- dotsub <- eval(mf$subset,data)
- mf[[1]] <- as.name("distance")
- mf$nearest <- mf$replace <- mf$m.order <- mf$ratio <- mf$caliper <-
- mf$calclosest <- mf$subclass <- mf$sub.by <- mf$mahvars <-
- mf$exact <- mf$full <- mf$full.options <- NULL
- a <- eval(as.call(mf), sys.frame(sys.parent()))
- if(!is.null(dotsub)) {
- data <- data[dotsub,]
+ if (is.null(distance.options$formula))
+ distance.options$formula <- formula
+ if (is.null(distance.options$data))
+ distance.options$data <- data
+ out1 <- do.call(fn1, distance.options)
+ discarded <- discard(treat, out1$distance, discard, X)
+ if (reestimate) {
+ distance.options$data <- data[!discarded,]
+ out1 <- do.call(fn1, distance.options)
}
+ distance <- out1$distance
+ }
- b <- matchdef(formula, a$in.sample, a$pscore, nearest = nearest,
- replace = replace, m.order = m.order, ratio = ratio,
- caliper = caliper, calclosest = calclosest,
- mahvars = mahvars,data = data, exact = exact,
- counter = counter)
-
- b1 <- subclassify(formula, data, a$in.sample, a$pscore,
- nearest = nearest, b$match.matrix,
- subclass = subclass, sub.by = sub.by, counter =
- counter, full = full, full.options=full.options)
-
- c <- diagnose(formula, b$match.matrix, a$pscore, a$in.sample,
- data=data, exact=exact, mahvars=mahvars,
- subclass=subclass, psclass=b1$psclass,nearest=nearest,
- q.cut=b1$q.cut, counter=counter)
-
- ##adding pscore and weights to dataframe
- pscore <- a$pscore
- psweights <- c$weights
-
- # Create psclass with subclasses
- if (is.null(b1$psclass)) psclass <- rep(1, length(pscore))
- else psclass <- b1$psclass
-
- data <- cbind(data,psweights,pscore, psclass)
-
- z <- list(formula=formula,psweights=psweights,
- treat=attr(terms(formula(cl)),"variables")[[2]],
- in.sample=a$in.sample, match.matrix=b$match.matrix,
- covariates=attr(delete.response(tt),"term.labels"),
- psclass=b1$psclass, q.cut=b1$q.cut, assign.model=a$assign.model,
- call=cl,matched=(c$weights!=0), data=data)
- class(z) <- "matchit"
- return(z)
+ ## matching!
+ out2 <- do.call(fn2, list(treat, X, data, distance=distance, discarded, ...))
+
+ ## putting all the results together
+ out2$call <- match.call()
+ out2$model <- out1$model
+ out2$formula <- formula
+ out2$treat <- treat
+ if (is.null(out2$X)){
+ out2$X <- X
}
+ out2$distance <- distance
+ out2$discarded <- discarded
+
+ return(out2)
}
diff --git a/R/matchit.qqplot.R b/R/matchit.qqplot.R
new file mode 100644
index 0000000..2a0009a
--- /dev/null
+++ b/R/matchit.qqplot.R
@@ -0,0 +1,86 @@
+matchit.qqplot <- function(x,discrete.cutoff,
+ which.subclass=NULL, numdraws=5000,
+ interactive = T, which.xs = NULL){
+ covariates <- x$X
+ if(!is.null(which.xs)){
+ if(sum(which.xs%in%dimnames(covariates)[[2]])!=length(which.xs)){
+ stop("which.xs is incorrectly specified")
+ }
+ covariates <- covariates[,which.xs,drop=F]
+ }
+ treat <- x$treat
+ matched <- x$weights!=0
+
+ ## For full matching, sample numdraws observations using the weights
+ if(x$call$method=="full") {
+ t.plot <- sample(names(treat)[treat==1], numdraws/2, replace=TRUE, prob=x$weights[treat==1])
+ c.plot <- sample(names(treat)[treat==0], numdraws/2, replace=TRUE, prob=x$weights[treat==0])
+
+ m.covariates <- x$X[c(t.plot, c.plot),]
+ m.treat <- x$treat[c(t.plot, c.plot)]
+ } else {
+ m.covariates <- covariates[matched,,drop=F]
+ m.treat <- treat[matched]
+ }
+
+ if(!is.null(which.subclass)){
+ subclass <- x$subclass
+ sub.index <- subclass==which.subclass & !is.na(subclass)
+ sub.covariates <- covariates[sub.index,,drop=F]
+ sub.treat <- treat[sub.index]
+ sub.matched <- matched[sub.index]
+ ## Matched units in each subclass
+ m.covariates <- sub.covariates[sub.matched,,drop=F]
+ m.treat <- sub.treat[sub.matched]
+ ## Compare to full sample--reset covariates and treat to full data set
+# covariates <- x$X
+# treat <- x$treat
+ }
+ nn <- dimnames(covariates)[[2]]
+ nc <- length(nn)
+ covariates <- data.matrix(covariates)
+ oma <- c(4, 4, 6, 4)
+ if(interactive){
+ opar <- par(mfrow = c(3, 3), mar = rep.int(1/2, 4), oma = oma, ask=T)
+ } else {
+ opar <- par(mfrow = c(3, 3), mar = rep.int(1/2, 4), oma = oma, ask=F)
+ }
+ on.exit(par(opar))
+ for (i in 1:nc){
+ xi <- covariates[,i]
+ m.xi <- m.covariates[,i]
+ ni <- nn[i]
+ plot(xi,type="n",axes=F)
+ if(((i-1)%%3)==0){
+ htext <- "QQ Plots"
+ if(!is.null(which.subclass)){
+ htext <- paste(htext,paste(" (Subclass ",which.subclass,")",sep=""),sep="")
+ }
+ mtext(htext, 3, 3, TRUE, 0.5, cex=1.2,font=2)
+ mtext("All", 3, 1, TRUE, 0.5, cex=1.2,font = 1)
+ mtext("Matched", 3, 1, TRUE, 0.83, cex=1.2,font = 1)
+ mtext("Control Units", 1, 0, TRUE, 2/3, cex=1,font = 1)
+ mtext("Treated Units", 4, 0, TRUE, 0.5, cex=1,font = 1)
+ }
+ par(usr = c(0, 1, 0, 1))
+ l.wid <- strwidth(nn, "user")
+ cex.labels <- max(0.8, min(2, 0.9/max(l.wid)))
+ text(0.5,0.5,ni,cex=cex.labels)
+ if(length(table(xi))<=discrete.cutoff){
+ xi <- jitter(xi)
+ m.xi <- jitter(m.xi)
+ }
+ rr <- range(xi)
+ qqplot(xi[treat==0],xi[treat==1], xlim=rr,ylim=rr,axes=F,ylab="",xlab="")
+ abline(a=0,b=1)
+ abline(a=(rr[2]-rr[1])*0.1,b=1,lty=2)
+ abline(a=-(rr[2]-rr[1])*0.1,b=1,lty=2)
+ axis(2)
+ box()
+ qqplot(m.xi[m.treat==0],m.xi[m.treat==1],xlim=rr,ylim=rr,axes=F,ylab="",xlab="")
+ abline(a=0,b=1)
+ abline(a=(rr[2]-rr[1])*0.1,b=1,lty=2)
+ abline(a=-(rr[2]-rr[1])*0.1,b=1,lty=2)
+ box()
+ }
+}
diff --git a/R/matchit2exact.R b/R/matchit2exact.R
new file mode 100644
index 0000000..8ccb445
--- /dev/null
+++ b/R/matchit2exact.R
@@ -0,0 +1,22 @@
+matchit2exact <- function(treat, X, data, distance, discarded, verbose=FALSE, ...){
+
+ if(verbose)
+ cat("Exact matching... \n")
+
+ n <- length(treat)
+ xx <- apply(X, 1, function(x) paste(x, collapse = "\r"))
+ xx1 <- xx[treat==1]
+ xx0 <- xx[treat==0]
+ cc <- unique(xx1)
+ cc <- cc[cc%in%xx0]
+ ncc <- length(cc)
+
+ psclass <- rep(NA,n)
+ names(psclass) <- names(treat)
+ for(i in 1:ncc)
+ psclass[xx==cc[i]] <- i
+
+ res <- list(subclass = psclass, weights = weights.subclass(psclass, treat))
+ class(res) <- c("matchit.exact", "matchit")
+ return(res)
+}
diff --git a/R/matchit2full.R b/R/matchit2full.R
new file mode 100644
index 0000000..6c7939c
--- /dev/null
+++ b/R/matchit2full.R
@@ -0,0 +1,37 @@
+matchit2full <- function(treat, X, data, distance, discarded,
+ verbose=FALSE, ...) {
+ if (!("optmatch" %in% .packages(all = TRUE)))
+ install.packages("optmatch", contriburl="http://www.stat.lsa.umich.edu/~bbh/optmatch")
+ require(optmatch)
+
+ if(verbose)
+ cat("Full matching... \n")
+
+ ## full matching for undiscarded units
+ ttt <- treat[!discarded]
+ n0 <- length(ttt[ttt==0])
+ n1 <- length(ttt[ttt==1])
+ d1 <- distance[ttt==1]
+ d0 <- distance[ttt==0]
+ d <- matrix(0, ncol=n0, nrow=n1)
+ rownames(d) <- names(ttt[ttt==1])
+ colnames(d) <- names(ttt[ttt==0])
+ for (i in 1:n1)
+ d[i,] <- abs(d1[i]-d0)
+ full <- fullmatch(d, ...)
+ psclass <- full[pmatch(names(ttt), names(full))]
+ psclass <- as.numeric(as.factor(psclass))
+ names(psclass) <- names(ttt)
+
+ ## add psclass = NA for discarded units
+ if (any(discarded)) {
+ tmp <- rep(NA, sum(discarded))
+ names(tmp) <- names(treat[discarded])
+ psclass <- c(psclass, tmp)[names(treat)]
+ }
+
+ ## calculate weights and return the results
+ res <- list(subclass = psclass, weights = weights.subclass(psclass, treat))
+ class(res) <- c("matchit.full", "matchit")
+ return(res)
+}
diff --git a/R/matchit2genetic.R b/R/matchit2genetic.R
new file mode 100644
index 0000000..9aedf33
--- /dev/null
+++ b/R/matchit2genetic.R
@@ -0,0 +1,47 @@
+matchit2genetic <- function(treat, X, data, distance, discarded,
+ ratio = 1, verbose = FALSE, ...) {
+ if (!("rgenoud" %in% .packages(all = TRUE)))
+ install.packages("rgenoud")
+ require(rgenoud)
+
+ if (!("Matching" %in% .packages(all = TRUE)))
+ install.packages("Matching")
+ require(Matching)
+
+ if (verbose)
+ cat("Genetic matching... \n")
+
+ tt <- treat[!discarded]
+ n <- length(tt)
+ n1 <- length(tt[tt==1])
+ xx <- X[!discarded,]
+ dd <- distance[!discarded]
+ tind <- (1:n)[tt==1]
+ cind <- (1:n)[tt==0]
+ labels <- names(tt)
+ tlabels <- names(tt[tt==1])
+ clabels <- names(tt[tt==0])
+ out <- GenMatch(tt, cbind(dd, xx), M = ratio, ...)$matches
+ ## ratio matching does not seem to work with GenMatch
+ mm <- matrix(0, nrow = n1, ncol = max(table(out[,1])), dimnames =
+ list(tlabels, 1:max(table(out[,1]))))
+ for (i in 1:n1) {
+ tmp <- labels[c(out[out[,1]==tind[i],2:(ratio+1)])]
+ if (length(tmp) < ncol(mm))
+ tmp <- c(tmp, rep(NA, ncol(mm)-length(tmp)))
+ mm[i,] <- tmp
+ }
+
+ if (any(discarded)) {
+ tdisc <- discarded[treat==1]
+ tmp <- matrix(NA, nrow = sum(tdisc), ncol = ncol(mm), dimnames =
+ list(names(treat[treat == 1 & discarded]),
+ 1:ncol(mm)))
+ mm <- as.matrix(rbind(mm, tmp)[names(treat[treat==1]),])
+ }
+
+ res <- list(match.matrix = mm, weights = weights.matrix(mm, treat,
+ discarded))
+ class(res) <- "matchit"
+ return(res)
+}
diff --git a/R/matchit2nearest.R b/R/matchit2nearest.R
new file mode 100644
index 0000000..cc4988b
--- /dev/null
+++ b/R/matchit2nearest.R
@@ -0,0 +1,246 @@
+matchit2nearest <- function(treat, X, data, distance, discarded,
+ ratio=1, replace = FALSE, m.order = "largest",
+ caliper = 0, calclosest = FALSE,
+ mahvars = NULL, exact = NULL,
+ subclass=NULL, verbose=FALSE, sub.by=NULL, ...){
+
+ if(verbose)
+ cat("Nearest neighbor matching... \n")
+
+ #replace
+ if(!(identical(replace,TRUE) | identical(replace,FALSE))){
+ warning("replace=",replace," is invalid; used replace=FALSE instead",call.=FALSE);replace=FALSE}
+ #m.order
+ if(!(identical(m.order,"largest") | identical(m.order,"smallest") |
+ identical(m.order,"random"))){
+ warning("m.order=",m.order," is invalid; used m.order='largest' instead",call.=FALSE);m.order="largest"}
+ #ratio
+ ratio <- round(ratio)
+ if(!is.numeric(ratio) | ratio[1]<1 | !identical(round(length(ratio)),1)){
+ warning("ratio=",ratio," is invalid; used ratio=1 instead",call.=FALSE);ratio=1}
+ #caliper
+ if(!is.vector(caliper) | !identical(round(length(caliper)),1)){
+ warning("caliper=",caliper," is invalid; Caliper matching not done",call.=FALSE);caliper=0}
+ if(caliper<0){
+ warning("caliper=",caliper," is less than 0; Caliper matching not done",call.=FALSE);caliper=0}
+ #calclosest
+ if(!(identical(calclosest,TRUE)| identical(calclosest,FALSE))){
+ warning("calclosest=",calclosest," is invalid; used calclosest=FALSE instead",call.=FALSE)
+ calclosest=FALSE}
+ #mahvars & caliper
+ if (!is.null(mahvars) & caliper[1]==0){
+ warning("No caliper size specified for Mahalanobis matching. Caliper=.25 used.",call. = FALSE);caliper=.25}
+
+ # Sample sizes, labels
+ n <- length(treat)
+ n0 <- length(treat[treat==0])
+ n1 <- length(treat[treat==1])
+ d1 <- distance[treat==1]
+ d0 <- distance[treat==0]
+
+ if(is.null(names(treat)))
+ names(treat) <- 1:n
+ labels <- names(treat)
+ tlabels <- names(treat[treat==1])
+ clabels <- names(treat[treat==0])
+ in.sample <- !discarded
+ names(in.sample) <- labels
+
+ ## Generating match matrix
+ match.matrix <- matrix(0, nrow=n1, ncol=ratio, dimnames=list(tlabels, 1:ratio))
+
+ ## Vectors of whether unit has been matched:
+ ## = 0 if not matched (unit # of match if matched)
+ ## = -1 if can't be matched (if in.sample=0)
+ matchedc <- rep(0,length(d0))
+ names(matchedc) <- clabels
+
+ ## These are the units that are ineligible because of discard
+ ## (in.sample==0)
+ matchedc[in.sample[clabels]==0] <- -1
+ match.matrix[in.sample[tlabels]==0,] <- -1
+ matchedt <- match.matrix[,1]
+ names(matchedt) <- tlabels
+
+ ## total number of matches (including ratios) = ratio * n1
+ tr <- length(match.matrix[match.matrix!=-1])
+ r <- 1
+
+ ## Caliper for matching (=0 if caliper matching not done)
+ sd.cal <- caliper*sqrt(var(distance[in.sample==1]))
+
+ ## Var-covar matrix for Mahalanobis (currently set for full sample)
+ if (!is.null(mahvars)) {
+ if(!sum(mahvars%in%names(data))==length(mahvars)) {
+ warning("Mahvars not contained in data. Mahalanobis matching not done.",call.=FALSE)
+ mahvars=NULL
+ }
+ else { ww <- mahvars%in%dimnames(X)[[2]]
+ nw <- length(mahvars)
+ mahvars <- data[,mahvars,drop=F]
+ Sigma <- var(mahvars)
+ if(sum(ww)!=nw){
+ X <- cbind(X,mahvars[!ww])
+ }
+ mahvars <- as.matrix(mahvars)
+ }
+ }
+
+ ## Now for exact matching within nearest neighbor
+ ## exact should not equal T for this type of matching--that would get sent to matchit2exact
+ if (!is.null(exact)){
+ if(!sum(exact%in%names(data))==length(exact)) {
+ warning("Exact variables not contained in data. Exact matching not done.",call.=FALSE)
+ exact=NULL
+ }
+ else {
+ ww <- exact%in%dimnames(X)[[2]]
+ nw <- length(exact)
+ exact <- data[,exact,drop=F]
+ if(sum(ww)!=nw){
+ X <- cbind(X,exact[!ww])
+ }
+ }
+ }
+
+ ## Looping through nearest neighbour matching for all treatment units
+ ## Only do matching for units with in.sample==1 (matched!=-1)
+ if(verbose){
+ trseq <- floor(seq(tr/10,tr,tr/10))
+ cat("Matching Treated: ")
+ }
+
+ for(i in 1:tr){
+ ## Make new matchedc column to be used for exact matching
+ ## Will only be 0 (eligible for matching) if it's an exact match
+ if(verbose) {if(i%in%trseq){cat(10*which(trseq==i),"%...",sep="")}} # a counter
+ matchedc2 <- matchedc
+ ##in cases there's no replacement and all controls have been used up
+ if(!0%in%matchedc2){
+ match.matrix[match.matrix[,r]==0 & !is.na(match.matrix[,r]),r] <- NA
+ if(r<ratio){match.matrix[,(r+1):ratio] <- NA}
+ break
+ }
+
+ ##in case there's replacement, but all units have been used in
+ ##previous ratios
+ if(sum(!is.na(match.matrix[,r]))==0){
+ if(r<ratio){match.matrix[,(r+1):ratio] <- NA}
+ break
+ }
+
+ ## Which ratio we're on
+ if(r!=ceiling(i/(tr/ratio))) {r <- r+1; matchedt <- match.matrix[,r]}
+
+ if(m.order=="largest") {iterd1 <- max(d1[matchedt==0],na.rm=T)}
+ if(m.order=="smallest") {iterd1 <- min(d1[matchedt==0],na.rm=T)}
+ if(m.order=="random") {iterd1 <- sample(d1[matchedt==0][!is.na(d1[matchedt==0])],1)}
+
+ ## The treatment unit for this iteration, again resolving ties randomly
+ itert <- as.vector(na.omit(tlabels[iterd1==d1 & matchedt==0]))
+ if(length(itert)>1){itert <- sample(itert,1)}
+
+ ## Calculating all the absolute deviations in propensity scores
+ ## Calculate only for those eligible to be matched (matchedc==0)
+ ## this first if statement only applies to replacement ratio
+ ## matching, so that each treatment unit is matched to a different
+ ## control unit than from the previous round
+
+ ## match number = NA if no units within caliper
+
+ ## Set things up for exact matching
+ ## Make matchedc2==-2 if it isn't an exact match
+ ## There might be a more efficient way to do this, but I couldn't figure
+ ## out another way to compare a vector with the matrix
+ if (!is.null(exact)) {
+ for (k in 1:dim(exact)[2]) matchedc2[exact[itert,k]!=exact[clabels,k]] <- -2
+ }
+
+ ## Need to add a check in case there aren't any eligible matches left...
+ if(replace & r!=1) {
+ if (sum(!clabels%in%match.matrix[itert,(1:r-1)] & matchedc2==0)==0) {
+ deviation <- NULL
+ mindev <- NA
+ }
+ else
+ deviation <- abs(d0[!clabels%in%match.matrix[itert,(1:r-1)] & matchedc2==0]-iterd1)
+ }
+ else {
+ if (sum(matchedc2==0)==0) {
+ deviation <- NULL
+ mindev <- NA
+ }
+ else deviation <- abs(d0[matchedc2==0]-iterd1)
+ }
+
+ if (caliper!=0 & (!is.null(deviation))) {
+ if(replace & r!=1)
+ pool <- clabels[!clabels%in%match.matrix[itert,(1:r-1)]
+ & matchedc2==0][deviation <= sd.cal]
+ else
+ pool <- clabels[matchedc2==0][deviation <= sd.cal]
+ if(length(pool)==0) {
+ if (calclosest==FALSE) mindev <- NA
+ else {
+ if (replace & r!= 1){
+ mindev <- clabels[!clabels%in%match.matrix[itert,(1:r-1)]][min(deviation)==deviation]
+ } else{mindev <- clabels[matchedc2==0][min(deviation)==deviation]}
+ }
+ }
+ else if (length(pool)==1) mindev <- pool[1]
+ else if (is.null(mahvars)) mindev <- sample(pool, 1)
+ else {
+ ## This has the important vars for the C's within the caliper
+ poolvarsC <- mahvars[pool,,drop=F]
+ ## Sigma is the full group var/covar matrix of Mahalvars
+ mahal <- mahalanobis(poolvarsC, mahvars[itert,],Sigma)
+ mindev <- pool[mahal==min(mahal)]
+ }
+ }
+ else if(!is.null(deviation)) {
+ if (replace & r!=1){
+ mindev <- clabels[!clabels%in%match.matrix[itert,(1:r-1)] & matchedc2==0][min(deviation)==deviation]
+ } else {mindev <- clabels[matchedc2==0][min(deviation)==deviation]}
+ }
+
+ ## Resolving ties in minimum deviation by random draw
+ if(length(mindev)>1){goodmatch <- sample(mindev,1)} else goodmatch <- mindev
+
+ ## Storing which treatment unit has been matched to control, and
+ ## vice versa
+ matchedt[itert==tlabels] <- goodmatch
+ matchedc[goodmatch==clabels] <- itert
+
+ ## instead of the in.sample, we now have an index with dimensions n1 by # of
+ ## matches (ratio)
+ match.matrix[which(itert==tlabels),r] <- goodmatch
+
+ ## If matching with replacement, set matchedc back to 0 so it can be reused
+ if (replace) matchedc[goodmatch==clabels] <- 0
+
+ }
+ if(verbose){cat("Done\n")}
+
+ x <- as.matrix(match.matrix)
+ x[x==-1] <- NA
+
+
+ ## Calculate weights and return the results
+ res <- list(match.matrix = match.matrix, weights =
+ weights.matrix(match.matrix, treat, discarded), X=X)
+
+ ## Subclassifying
+ if(!is.null(subclass)){
+ if(is.null(sub.by)) sub.by="treat"
+ psres <- matchit2subclass(treat,X,data,distance,discarded,
+ match.matrix=match.matrix,
+ subclass=subclass,
+ verbose=verbose, sub.by=sub.by, ...)
+ res$subclass <- psres$subclass
+ res$q.cut <- psres$q.cut
+ class(res) <- c("matchit.subclass", "matchit")
+ } else{
+ class(res) <- "matchit"
+ }
+ return(res)
+}
diff --git a/R/matchit2optimal.R b/R/matchit2optimal.R
new file mode 100644
index 0000000..e575ee9
--- /dev/null
+++ b/R/matchit2optimal.R
@@ -0,0 +1,56 @@
+matchit2optimal <- function(treat, X, data, distance, discarded,
+ ratio = 1, verbose=FALSE, ...) {
+
+ if (!("optmatch" %in% .packages(all = TRUE)))
+ install.packages("optmatch", contriburl="http://www.stat.lsa.umich.edu/~bbh/optmatch")
+ require(optmatch)
+
+ if(verbose)
+ cat("Optimal matching... \n")
+
+ ## optimal matching for undiscarded units
+ ttt <- treat[!discarded]
+ n0 <- length(ttt[ttt==0])
+ n1 <- length(ttt[ttt==1])
+ d1 <- distance[ttt==1]
+ d0 <- distance[ttt==0]
+ d <- matrix(0, ncol=n0, nrow=n1)
+ tlabels <- rownames(d) <- names(ttt[ttt==1])
+ clabels <- colnames(d) <- names(ttt[ttt==0])
+ for (i in 1:n1)
+ d[i,] <- abs(d1[i]-d0)
+ full <- fullmatch(d, min.controls = ratio,
+ max.controls = ratio,
+ omit.fraction = (n0-ratio*n1)/n0, ...)
+ psclass <- full[pmatch(names(ttt), names(full))]
+ psclass <- as.numeric(as.factor(psclass))
+ names(psclass) <- names(ttt)
+
+ mm <- matrix(0, nrow = n1, ncol = ratio, dimnames = list(tlabels, 1:ratio))
+ for (i in 1:n1)
+ mm[i,] <- names(which(psclass[tlabels[i]] == psclass[-pmatch(tlabels[i],
+ names(psclass))]))
+
+ if (any(discarded)) {
+ ## add psclass = NA for discarded units
+ tmp <- rep(NA, sum(discarded))
+ names(tmp) <- names(treat[discarded])
+ psclass <- c(psclass, tmp)[names(treat)]
+
+ ## add match.matrix = NA for discarded units
+ tdisc <- discarded[treat==1]
+ if (any(tdisc)) {
+ tmp <- matrix(NA, nrow = sum(tdisc), ncol= ratio,
+ dimnames = list(names(treat[treat==1 & discarded]),
+ 1:ratio))
+ mm <- as.matrix(rbind(mm, tmp)[names(treat[treat==1]),])
+ }
+ }
+
+ ## calculate weights and return the results
+ res <- list(match.matrix = mm, subclass = psclass,
+ weights = weights.matrix(mm, treat, discarded))
+
+ class(res) <- "matchit"
+ return(res)
+}
diff --git a/R/matchit2subclass.R b/R/matchit2subclass.R
new file mode 100644
index 0000000..b36f891
--- /dev/null
+++ b/R/matchit2subclass.R
@@ -0,0 +1,90 @@
+matchit2subclass <- function(treat, X, data, distance, discarded,
+ match.matrix=NULL, subclass=6, sub.by="treat",
+ verbose = FALSE){
+
+ if(verbose)
+ cat("Subclassifying... \n")
+
+ # sub.by
+ if(!is.vector(sub.by)|length(sub.by)!=1){
+ warning(sub.by," is not a valid sub.by option; sub.by=\"treat\" used instead",call.=FALSE); sub.by <- "treat"}
+ if(!sub.by%in%c("treat","control","all")){
+ warning(sub.by, " is not a valid sub.by option; sub.by=\"treat\" used instead",call.=FALSE); sub.by <- "treat"}
+
+ #subclass
+ if(length(subclass)==1){
+ if(subclass<0 | subclass==1){
+ warning(subclass, " is not a valid subclass; subclass=6 used instead",call.=FALSE); subclass <- 6}
+ } else {
+ if(!is.vector(subclass)){
+ warning(subclass, " is not a valid subclass; subclass=6 used instead",call.=FALSE); subclass <- 6}
+ if(sum(subclass<=1 & subclass>=0)!=length(subclass)){
+ warning("Subclass ", subclass, " is not bounded by 0 and 1; subclass=6 used instead",
+ call.=FALSE); subclass <- 6}
+ }
+
+ in.sample <- !discarded
+ n <- length(treat)
+ ## Matching & Subclassification
+ if(!is.null(match.matrix)){
+ #match.matrix <- match.matrix[in.sample[treat==1],,drop=F]
+ t.units <- row.names(match.matrix)[in.sample[treat==1]==1]
+ c.units <- na.omit(as.vector(as.matrix(match.matrix)))
+ matched <-c(t.units,c.units)
+ matched <- names(treat)%in%matched
+ } else
+ matched <- rep(TRUE,n)
+ names(matched) <- names(treat)
+ m1 <- matched[treat==1]
+ m0 <- matched[treat==0]
+ p1 <- distance[treat==1][m1]
+ p0 <- distance[treat==0][m0]
+ ## Settting Cut Points
+ if(length(subclass)!=1 | (length(subclass)==1 & all(subclass<1))) {
+ subclass <- sort(subclass)
+ if (subclass[1]==0)
+ subclass <- subclass[-1]
+ if (subclass[length(subclass)]==1)
+ subclass <- subclass[-length(subclass)]
+ if(sub.by=="treat")
+ q <- c(0,quantile(p1,probs=c(subclass)),1)
+ else if(sub.by=="control")
+ q <- c(0,quantile(p0,probs=c(subclass)),1)
+ else if(sub.by=="all")
+ q <- c(0,quantile(distance,probs=c(subclass)),1)
+ else
+ stop("Invalid input for sub.by")
+ }
+ else {
+ if(subclass<=0){stop("Subclass must be a positive vector",call.=FALSE)}
+ sprobs <- seq(0,1,length=(round(subclass)+1))
+ sprobs <- sprobs[2:(length(sprobs)-1)]
+ if(sub.by=="treat")
+ q <- c(0,quantile(p1,probs=sprobs,na.rm=TRUE),1)
+ else if(sub.by=="control")
+ q <- c(0,quantile(p0,probs=sprobs,na.rm=TRUE),1)
+ else if(sub.by=="all")
+ q <- c(0,quantile(distance,probs=sprobs,na.rm=TRUE),1)
+ else
+ stop("Must specify a valid sub.by",call.=FALSE)
+ }
+ ## Calculating Subclasses
+ qbins <- length(q)-1
+ psclass <- rep(0,n)
+ names(psclass) <- names(treat)
+ for (i in 1:qbins){
+ q1 <- q[i]
+ q2 <- q[i+1]
+ psclass <- psclass+i*as.numeric(distance<q2 & distance>=q1)
+ }
+
+ ## No subclass for discarded or unmatched units
+ psclass[in.sample==0] <- NA
+ psclass[!matched] <- NA
+ if(verbose){cat("Done\n")}
+ res <- list(subclass = psclass, q.cut = q,
+ weights = weights.subclass(psclass, treat))
+
+ class(res) <- c("matchit.subclass", "matchit")
+ return(res)
+}
diff --git a/R/neyman.R b/R/neyman.R
deleted file mode 100644
index f1f1bf9..0000000
--- a/R/neyman.R
+++ /dev/null
@@ -1,164 +0,0 @@
-neyman<-function(Y, object, bootstrap=NULL, counter=TRUE){
- mf<-match.call()
- matchit.call <- object$call
- weighted.var<-function(x, w)
- sum(w*(x-weighted.mean(x, w))^2)/(sum(w)-1)
- est<-function(Y, object, data){
- treat<-eval(object$treat, data)
- Y<-eval(mf$Y, data)
- W<-object$psweights
- indx<-object$psclass
- if(is.null(indx)){
- m<-1
- indx<-rep(1, length(Y))
- } else {
- m<-max(na.omit(indx))
- }
- ate.est<-ate.var<-Ntrt<-Ncont<-matrix(NA, nrow=1, ncol=m)
- for(i in 1:m){
- Ntrt[i]<-length(Y[treat==1 & indx==i])
- Ncont[i]<-length(Y[treat==0 & indx==i])
- ate.est[i]<-weighted.mean(Y[treat==1 & indx==i],
- W[treat==1 & indx==i]) -
- weighted.mean(Y[treat==0 & indx==i],
- W[treat==0 & indx==i])
- ate.var[i]<-weighted.var(Y[treat==1 & indx==i],
- W[treat==1 & indx==i])/sum(W[treat==1 & indx == i]) +
- weighted.var(Y[treat==0 & indx==i],
- W[treat==0 & indx==i])/sum(W[treat==0 & indx == i])
- }
- if(m==1) {
- Ntrt=length(Y[treat==1 & W!=0])
- Ncont=length(Y[treat==0 & W!=0])
- } else {
- colnames(ate.est)<-c(1:m)
- for(i in 1:m)
- colnames(ate.est)[i]<-paste("Subclass", i)
- }
- list(ate.est=ate.est, ate.var=ate.var, Ntrt=Ntrt, Ncont=Ncont, bootstrap=NULL)
- }
-
- if(class(object)!="matchit"){
- stop("the input object is not a matchit object")
- }
-
- data<-eval(object$data, sys.parent())
-
- if (identical(eval(matchit.call$exact), TRUE)) {
- Y <- eval(mf$Y, object$data)
- W <- object$psweights
- treat <- eval(object$treat, object$data)
-
- ate <- weighted.mean(Y[treat==1], W[treat==1]) -
- weighted.mean(Y[treat==0], W[treat==0])
- se <- sqrt(weighted.var(Y[treat==1], W[treat==1])/sum(W[treat==1])
- + weighted.var(Y[treat==0], W[treat==0])/sum(W[treat==0]))
- numtrt <- sum(W[treat==1])
- numcont <- sum(W[treat==0])
- }
- else {
- if(is.null(bootstrap)) {
- res<-est(Y, object, data)
- } else {
- ate.est<-Ntrt<-Ncont<-NULL
- cl<-object$call
- cl$counter<-FALSE
- cl$data<-as.name("dta")
- if(counter) {
- bseq<-floor(seq(bootstrap/10, bootstrap, bootstrap/10))
- }
- for(i in 1:bootstrap){
- dta<-data[sample(c(1:nrow(data)), size=nrow(data),
- replace=T),!names(data) %in% c("pscore", "psweights", "psclass")]
- bobject<-eval(cl)
- tmp<-est(Y,bobject,dta)
- ate.est<-rbind(ate.est, tmp$ate.est)
- Ntrt<-rbind(Ntrt, tmp$Ntrt)
- Ncont<-rbind(Ncont, tmp$Ncont)
- if(counter & i%in%bseq)
- cat(10*which(bseq==i),"%...", sep="")
- }
- cat("Done\n")
- row.names(ate.est)<-c(1:bootstrap)
- res<-list(ate.est=ate.est, ate.var=NULL, Ntrt=Ntrt, Ncont=Ncont, bootstrap=bootstrap)
- }
-
- # Subclass estimates calculated above...now aggregate
- if(is.null(object$call$sub.by)){
- sub.by <- "treat"
- } else {
- sub.by <- object$call$sub.by
- }
- if(!is.null(bootstrap)){
- if(is.null(object$call$subclass) | identical(object$call$subclass,0)){
- ate<-apply(res$ate.est, 2, mean)
- numtrt <- apply(res$Ntrt, 2, mean)
- numcont <- apply(res$Ncont, 2, mean)
- } else{
- if(sub.by=="treat"){
- w<-res$Ntrt/apply(res$Ntrt,1,sum)
- } else if(sub.by=="control"){
- w<-res$Ncont/apply(res$Ncont,1,sum)
- } else if(sub.by=="all"){
- w <- (res$Ncont+res$Ntrt)/apply((res$Ncont+res$Ntrt),1,sum)
- }
- # Set w to 0 if block has fewer then 2 t or c units
- w[res$Ntrt<=1 | res$Ncont<=1] <- rep(0, sum(res$Ntrt<=1 | res$Ncont<=1))
- ate.boot <- NULL
- for(i in 1:bootstrap){
- ate.boot<-c(ate.boot,weighted.mean(res$ate.est[i,],w[i,]))
- }
- ate<-c(mean(ate.boot),apply(res$ate.est, 2, mean))
- numtrt <- c(mean(apply(res$Ntrt, 1, sum)), apply(res$Ntrt, 2, mean))
- numcont <- c(mean(apply(res$Ncont, 1, sum)), apply(res$Ncont, 2, mean))
- }
- }
- else {
- if(sub.by=="treat"){
- w<-res$Ntrt/sum(res$Ntrt)
- } else if(sub.by=="control"){
- w<-res$Ncont/sum(res$Ncont)
- } else if(sub.by=="all"){
- w <- (res$Ncont+res$Ntrt)/sum(res$Ncont+res$Ntrt)
- }
- # Set w to 0 if block has fewer then 2 t or c units
- w[res$Ntrt<=1 | res$Ncont<=1] <- rep(0, sum(res$Ntrt<=1 | res$Ncont<=1))
- ate<-apply(res$ate.est, 2, weighted.mean, w)
- if (length(res$Ntrt)> 1) {
- numtrt <- c(sum(res$Ntrt), res$Ntrt)
- numcont <- c(sum(res$Ncont), res$Ncont)
- } else {
- numtrt <- res$Ntrt
- numcont <- res$Ncont
- }
- }
- if(is.null(res$ate.var)){
- if(is.null(object$call$subclass) | identical(object$call$subclass,0)){
- se<-sqrt(apply(res$ate.est, 2, var))
- } else {
- se <- c(sd(ate.boot),sqrt(apply(res$ate.est, 2, var)))
- }
- } else{
- se<-sqrt(res$ate.var)
- }
- if(is.null(bootstrap)){
- if(length(ate)>1){
- if(is.null(res$ate.var)){
- tmp<-apply(res$ate.est, 1, weighted.mean, w)
- ate<-c(mean(tmp), ate)
- se<-c(sqrt(var(tmp)), se)
- }
- else{
- # weight=0 if block has < 2 units in either group
- ate<-c(weighted.mean(ate, w, na.rm=T), ate)
- # Only calculate se for blocks with > 1 unit (se!=nan)
- se<-c(sqrt(sum((w[!is.nan(se)]*se[!is.nan(se)])^2)), se)
- }
- }
- }
- }
-
- ressum<-list(ate=ate, se=se, Ntrt=numtrt, Ncont=numcont)
- class(ressum) <- "neyman"
- ressum
-}
diff --git a/R/plot.matchit.R b/R/plot.matchit.R
index b3807e5..8213cf9 100644
--- a/R/plot.matchit.R
+++ b/R/plot.matchit.R
@@ -1,203 +1,17 @@
-plot.matchit <- function(x, ...)
- {
- if (identical(eval(x$call$exact),TRUE)){
- return(warning("Plot() not appropriate for exact matching. No plots generated"))
- }
- match.matrix <- x$match.matrix
- xdata <- x$data
- treata <- model.frame(x$formula,xdata)[,1,drop=FALSE]
- pscore <- xdata[,"pscore"]
- in.sample <- x$in.sample
- covariates <- x$covariates
- treat <- as.vector(treata[,1])
- names(treat) <- row.names(treata)
- covariates <- model.frame(delete.response(terms(x$formula)),xdata)[,,drop=FALSE]
- mahvars <- eval(x$call$mahvars)
- exact <- eval(x$call$exact)
-
- if (!is.null(mahvars)){
- w <- mahvars%in%names(covariates)
- if(sum(w)!=length(mahvars)){
- md <- as.data.frame(as.matrix(xdata[,mahvars[!w]]))
- names(md) <- mahvars[!w]
- covariates <- cbind(covariates,md)
- }
+# Need to account for weights -- how do we do qq plots with weights
+plot.matchit <- function(x, discrete.cutoff=5, type="QQ",
+ numdraws=5000, interactive = T, which.xs =
+ NULL, ...){
+ if ("matchit.exact" %in% class(x)){
+ stop("Not appropriate for exact matching. No plots generated.")
}
- if (!identical(TRUE, exact) & !identical(FALSE, exact)) {
- w <- exact%in%names(covariates)
- if(sum(w)!=length(exact)) {
- ed <- as.data.frame(as.matrix(xdata[,exact[!w]]))
- names(ed) <- exact[!w]
- covariates <- cbind(covariates,ed)
- }
+ if(type=="QQ"){
+ matchit.qqplot(x=x,discrete.cutoff=discrete.cutoff,
+ numdraws=numdraws, interactive=interactive,
+ which.xs = which.xs)
+ } else if(type=="jitter"){
+ jitter.pscore(x, interactive=interactive)
+ } else {
+ stop("Invalid type")
}
- psclass <- x$psclass
- if(is.null(psclass)){subclass <- 0}else{subclass <- qbins <- max(na.omit(x$psclass))}
- nearest <- !is.null(x$match.matrix)
- q.cut <- x$q.cut
- n <- length(treat)
- n0 <- length(treat[treat==0])
- n1 <- length(treat[treat==1])
-
- if(is.null(names(treat))){names(treat) <- seq(1,n)}
- labels <- names(treat)
- clabels <- labels[treat==0]
- tlabels <- labels[treat==1]
- if(is.null(in.sample)){in.sample <- rep(TRUE,n); names(in.sample) <- names(treat)}
- if(nearest==TRUE) {
- match.matrix <- match.matrix[in.sample[treat==1],,drop=F]
- num.matches <- dim(match.matrix)[2]-apply(as.matrix(match.matrix), 1, function(x) { sum(is.na(x)) })
- names(num.matches) <- tlabels[in.sample[treat==1]]
- t.units <- row.names(match.matrix)[num.matches>0]
- c.units <- na.omit(as.vector(as.matrix(match.matrix)))
- matched <-c(t.units,unique(c.units))
- weights <- rep(0,length(treat))
- names(weights) <- labels
- weights[t.units] <- 1
-
- for (cont in clabels) {
- treats <- na.omit(row.names(match.matrix)[cont==match.matrix[,1]])
- if (dim(match.matrix)[2]>1) {
- for (j in 2:dim(match.matrix)[2])
- treats <- c(na.omit(row.names(match.matrix)[cont==match.matrix[,j]]),treats)
- }
- if (length(treats)==0) weights[cont] <- 0
- else for (k in 1:length(treats)) weights[cont] <- weights[cont]+1/num.matches[treats[k]]
- }
-
- } else {
- matched <- names(treat)
- weights <- rep(1,length(treat))
- names(weights) <- names(treat)
- }
-
- weights[!in.sample] <- 0
-
- doverlay <- function(x,treat,weights=NULL,xlab="",main="",lines=FALSE){
- xmiss <- !is.na(x)
- x <- x[xmiss]
- treat <- treat[xmiss]
- weights <- weights[xmiss]
- minobs <- min(x)
- maxobs <- max(x)
- dx1 <- density(x[treat==1],from=minobs,to=maxobs)
- dx0 <- density(x[treat==0],from=minobs,to=maxobs)
- if(!is.null(weights))
- {
- x1 <- x[treat==1&weights!=0]
- x0 <- sample(x[treat==0], size=min(10000,(100*length(x[treat==0]))),
- replace=TRUE,prob=weights[treat==0]/sum(weights[treat==0]))
- d1 <- density(x1,from=minobs,to=maxobs)
- bw <- d1$bw
- d0 <- density(x0,from=minobs,to=maxobs,bw=bw)
- par(mfrow=c(2,1))
- matplot(dx0$x,cbind(dx1$y,dx0$y),type="l",ylim=range(c(dx0$y,dx1$y,d1$y,d0$y)),
- ylab="Density",xlab=xlab,main=paste(main,": All Units",sep=""))
- legend(minobs,max(c(d1$y,d0$y,dx1$y,dx0$y)), lty=1:2, col=1:2,
- legend=c("Treatment","Control"))
- matplot(dx0$x,cbind(d1$y,d0$y),type="l",ylim=range(c(dx0$y,dx1$y,d1$y,d0$y)),
- ylab="Density",xlab=xlab,main=paste(main,": Matched Units",sep=""))
- legend(minobs,max(c(d1$y,d0$y,dx1$y,dx0$y)), lty=1:2, col=1:2,
- legend=c("Treatment","Control"))
- if (lines==T) abline(v=q.cut,col="grey",lty=1)
- par(mfrow=c(1,1))
- } else{
- matplot(dx0$x,cbind(dx1$y,dx0$y),type="l",ylab="Density",xlab=xlab,main=main)
- legend(minobs,max(c(dx1$y,dx0$y)),lty=1:2,col=1:2,legend=c("Treatment","Control"))
- if (lines==T) abline(v=q.cut, col="grey", lty=1)
- }
- }
-
- choice.menu <- function(choices,question)
- {
- k <- length(choices)-1
- Choices <- data.frame(choices)
- row.names(Choices) <- 0:k
- names(Choices) <- "Choices"
- print.data.frame(Choices,right=FALSE)
- ans <- readline(question)
- while(!ans%in%c(0:k))
- {
- print("Not valid -- please pick one of the choices")
- print.data.frame(Choices,right=FALSE)
- ans <- readline(question)
- }
- return(ans)
- }
- if(!is.null(pscore))
- {
- densq <- ("Would you like to see density estimates of the propensity scores?")
- denschoice <- c("No","Yes")
- densplot <- choice.menu(denschoice,densq)
- if(densplot==1){
- if(nearest){
- doverlay(pscore,treat,weights,main="Propensity Score",lines=T)
- #if(length(subclass)!=1 | subclass!=0){abline(v=q.cut,col="grey",lty=1)}
- } else {
- doverlay(pscore,treat,main="Propensity Score", lines=T)
- #if(length(subclass)!=1 | subclass!=0){abline(v=q.cut,col="grey",lty=1)}
- }
- }
-
- jitq <- ("Would you like to see a jitterplot of the propensity scores?")
- jitchoice <- c("No","Yes")
- jitplot <- choice.menu(jitchoice,jitq)
- if(jitplot==1) {
- jitp <- jitter(rep(1,length(treat)),factor=6)-(treat==0)
- cwt <- sqrt(weights)
- plot(pscore,xlim=range(na.omit(pscore)),ylim=c(-1,2),type="n",ylab="",xlab="Propensity Score",axes=F,main="Distribution of Propensity Scores")
- if(length(subclass)!=1 | subclass!=0){abline(v=q.cut,col="grey",lty=1)}
- points(pscore[treat==1&weights!=0],jitp[treat==1&weights!=0],pch=18,cex=cwt[treat==1&weights!=0])
- points(pscore[treat==1&weights==0],jitp[treat==1&weights==0],pch=5,col="grey",cex=0.5)
- points(pscore[treat==0&weights!=0],jitp[treat==0&weights!=0],pch=18,cex=cwt[treat==0&weights!=0])
- points(pscore[treat==0&weights==0],jitp[treat==0&weights==0],pch=5,col="grey",cex=0.5)
- axis(1)
- text(sum(range(na.omit(pscore)))/2,1.5,"Treatment Units")
- text(sum(range(na.omit(pscore)))/2,-0.5,"Control Units")
- box()
- print("To identify the units, use first mouse button; to stop, use second.")
- identify(pscore,jitp,names(treat))
- }
- }
-
- if(nearest){
- choices <- c("No",paste("Yes : ",names(covariates)))
- question <- "Would you like to see density estimates of any other covariates?"
- ans <- -1
- while(ans!=0)
- {
- ans <- as.numeric(choice.menu(choices,question))
- if(ans!=0)
- if(nearest)
- {
- doverlay(covariates[,(ans)],treat,weights,main=names(covariates)[ans])
- } else {
- doverlay(covariates[,(ans)],treat,main=names(covariates)[ans])
- }
- }
- }
- if(length(subclass)!=1 | subclass!=0){
- choices <- c("No",paste("Yes : Subclass ", 1:qbins))
- question <- "Would you like to see density estimates of any subclass covariates?"
- ans <- -1
- while(ans!=0)
- {
- ans <- as.numeric(choice.menu(choices,question))
- if(ans!=0)
- {
- question2 <- "Which covariates?"
- choices2 <- c(paste(names(covariates)))
- ans2 <- as.numeric(choice.menu(choices2,question2))
- ans2 <- ans2+1
- if(sum(treat[psclass==ans]==1,na.rm=TRUE)<=2){
- print("Not enough treatment units in this subclass")
- } else if(sum(treat[psclass==ans]==0,na.rm=TRUE)<=2){
- print("Not enough control units in this subclass")} else{
- doverlay(covariates[,(ans2)][psclass==ans],treat[psclass==ans],main=paste("Subclass ",ans," : ",names(covariates)[ans2]))}
- }
- }
-
- }
-
- }
-
+}
diff --git a/R/plot.matchit.subclass.R b/R/plot.matchit.subclass.R
new file mode 100644
index 0000000..01b9912
--- /dev/null
+++ b/R/plot.matchit.subclass.R
@@ -0,0 +1,43 @@
+plot.matchit.subclass <- function(x, discrete.cutoff=5,
+ type="QQ", interactive = T,
+ subclass = NULL, which.xs=NULL,...){
+ choice.menu <- function(choices,question)
+ {
+ k <- length(choices)-1
+ Choices <- data.frame(choices)
+ row.names(Choices) <- 0:k
+ names(Choices) <- "Choices"
+ print.data.frame(Choices,right=FALSE)
+ ans <- readline(question)
+ while(!ans%in%c(0:k))
+ {
+ print("Not valid -- please pick one of the choices")
+ print.data.frame(Choices,right=FALSE)
+ ans <- readline(question)
+ }
+ return(ans)
+ }
+ if(type=="QQ"){
+ if(interactive){
+ choices <- c("No",paste("Yes : Subclass ", 1:max(x$subclass,na.rm=T)))
+ question <- "Would you like to see quantile-quantile plots of any subclasses?"
+ ans <- -1
+ while(ans!=0)
+ {
+ ans <- as.numeric(choice.menu(choices,question))
+ if(ans!=0)
+ {
+ matchit.qqplot(x,discrete.cutoff,which.subclass=ans,
+ interactive = interactive, which.xs=which.xs)
+ }
+ }
+ } else {
+ matchit.qqplot(x,discrete.cutoff,which.subclass=subclass,
+ interactive=interactive, which.xs=which.xs)
+ }
+ } else if(type=="jitter"){
+ jitter.pscore(x, interactive=interactive)
+ } else {
+ stop("Invalid type")
+ }
+}
diff --git a/R/print.matchit.R b/R/print.matchit.R
index eb9fae4..38f2ffb 100644
--- a/R/print.matchit.R
+++ b/R/print.matchit.R
@@ -1,146 +1,24 @@
-print.matchit <- function(x, digits = max(3, getOption("digits")-3), ...)
- {
- t.test.wtd <- function(x, treat, weights) {
- trt <- cov.wt(as.matrix(x[treat==1]), wt=weights[treat==1])
- mean.t <- trt$center
- cov.t <- trt$cov
- cont <- cov.wt(as.matrix(x[treat==0]), wt=weights[treat==0])
- mean.c <- cont$center
- cov.c <- cont$cov
- n.t <- sum(weights[treat==1])
- n.c <- sum(weights[treat==0])
- ratio.t <- cov.t/n.t
- ratio.c <- cov.c/n.c
- tt <- (mean.t-mean.c)/sqrt(ratio.t+ratio.c)
- }
- weighted.var <- function(x, w) {
- sum(w * (x - weighted.mean(x,w))^2)/(sum(w) - 1)}
- qoi <- function(xx,tt,ww,psc = NULL, full = F){
- xsum <- matrix(0,2,5)
- xsum <- as.data.frame(xsum)
- row.names(xsum) <- c("Full","Matched")
- names(xsum) <- c("Means Treated","Means Control","SD","T-stat","Bias")
- xn <- matrix(0,2,3)
- xn <- as.data.frame(xn)
- row.names(xn) <- c("Full","Matched")
- names(xn) <- c("Treated","Control","Total")
- xn[1,1] <- sum(tt==1)
- xn[1,2] <- sum(tt==0)
- xn[1,3] <- length(tt)
- x1 <- xx[tt==1]
- x0 <- xx[tt==0]
- xsum[1,1] <- mean(x1,na.rm=T)
- xsum[1,2] <- mean(x0,na.rm=T)
- if(!full){
- X.t.m <- xx[tt==1][ww[tt==1]>0]
- X.c.m <- xx[tt==0][ww[tt==0]>0]
- xsum[2,1] <- weighted.mean(X.t.m, ww[tt==1][ww[tt==1]>0])
- xsum[2,2] <- weighted.mean(X.c.m, ww[tt==0][ww[tt==0]>0])
- if(sum(tt==1)<2|(sum(tt==0)<2)){
- xsum[c(1,2),c(3,4,5)] <- NA
- } else {
- xsum[1,3] <- sd(xx,na.rm=T)
- if (is.null(x$call$exact))
- xsum[1,4] <- -1*t.test(xx~tt)$statistic
- else if(is.logical((eval(x$call$exact))))
- if(eval(x$call$exact))
- xsum[1,4] <- 0
- else
- xsum[1,4] <- -1*t.test(xx~tt)$statistic
- else
- xsum[1,4] <- -1*t.test(xx~tt)$statistic
- xsum[1,5] <- (mean(x1,na.rm=T)-mean(x0,na.rm=T))/sd(x1,na.rm=T)
- xsum[2,3] <- sqrt(weighted.var(xx[ww>0],ww[ww>0]))
- xsum[2,4] <- t.test.wtd(xx[ww>0],tt[ww>0],ww[ww>0])
- xsum[2,5] <- (xsum[2,1]-xsum[2,2])/sqrt(weighted.var(X.t.m,ww[tt==1][ww[tt==1]>0]))
- }
- xn[2,1] <- sum(tt[ww>0]==1)
- xn[2,2] <- sum(tt[ww>0]==0)
- xn[2,3] <- length(tt[ww>0])
- } else {
- xn[2,] <- xn[1,] #??
- m0 <- lm(xx~ tt)
- m1 <- lm(xx~ tt + as.factor(psc))
- tdat <- m1$model
- tdat$tt <- 1
- xsum[2,1] <- mean(predict(m1,newdata=tdat))
- tdat$tt <- 0
- xsum[2,2] <- mean(predict(m1,newdata=tdat))
- xsum[1,3] <- xsum[2,3] <- sd(xx)
- xsum[1,4] <- summary(m0)$coef[2,3]
- xsum[1,5] <- (xsum[1,1]-xsum[1,2])/sd(x1)
- xsum[2,4] <- summary(m1)$coef[2,3]
- xsum[2,5] <- (xsum[2,1]-xsum[2,2])/sd(x1)
- }
- list(xsum=xsum,xn=xn)
- }
- qoi.by.sub <- function(xx,tt,ww,qq){
- qbins <- max(qq,na.rm=TRUE)
- q.table <- matrix(0,5,qbins)
- qn <- matrix(0,3,qbins)
- matched <- ww!=0
- for (i in 1:qbins)
- {
- qi <- qq[matched]==i
- qx <- xx[matched][qi]
- qt <- tt[matched][qi]
- qw <- ww[matched][qi]
- if(sum(qt==1)<2|(sum(qt==0)<2)){
- if(sum(qt==1)<2){warning("Not enough treatment units in subclass ",i,call.=FALSE)}
- else if(sum(qt==0)<2){warning("Not enough control units in subclass ",i,call.=FALSE)}
- }
- qoi.i <- qoi(qx,qt,qw)
- q.table[,i] <- as.numeric(qoi.i$xsum[1,])
- qn[,i] <- as.numeric(qoi.i$xn[1,])
- }
- q.table <- as.data.frame(q.table)
- qn <- as.data.frame(qn)
- names(q.table) <- names(qn) <- paste("Subclass",1:qbins)
- row.names(q.table) <- names(qoi.i$xsum)
- row.names(qn) <- c("Treated","Control","Total")
- list(q.table=q.table,qn=qn)
- }
- pscore <- x$data[,"pscore"]
- treat <- eval(x$treat,x$data)
- names(treat) <- names(pscore)
- cat("\nAssignment model specification:\n", deparse(x$call),"\n\n",sep = "")
- if(identical(eval(x$call$exact),TRUE)) #for exact matching
- {
- cat("\nSample sizes for full and exactly matched data:\n\n")
- xqoi <- qoi(pscore, treat, x$psweights)
- print.data.frame(xqoi$xn, digits = digits)
- cat("\n\n")
- } else {
- if(is.null(x$match.matrix) & !identical(eval(x$call$full),TRUE))
- {
- cat("Summary of propensity score for full sample:\n\n")
- xqoi <- qoi(pscore, treat, x$psweights)
- print.data.frame(xqoi$xsum[1,], digits = digits)
- cat("\nSample sizes:\n\n")
- print.data.frame(xqoi$xn[1,], digits = digits)
- } else if(identical(eval(x$call$full),TRUE)){
- cat("\nSummary of propensity score for full and matched samples:\n\n")
- xqoi <- qoi(pscore, treat, x$psweights,
- psc = x$data$psclass, full = T)
- print.data.frame(xqoi$xsum, digits = digits)
- cat("\nSample sizes:\n\n")
- print.data.frame(xqoi$xn, digits = digits)
- } else {
- cat("\nSummary of propensity score for full and matched samples:\n\n")
- xqoi <- qoi(pscore, treat, x$psweights)
- print.data.frame(xqoi$xsum, digits = digits)
- cat("\nSample sizes:\n\n")
- print.data.frame(xqoi$xn, digits = digits)
- }
- cat("\n\n")
- if(!is.null(x$psclass) & !identical(eval(x$call$full),TRUE)) {
- qqoi <- qoi.by.sub(pscore, treat, x$psweights, x$psclass)
- cat("\nSummary of propensity score by subclasses:\n\n")
- print.data.frame(qqoi$q.table, digits = digits)
- cat("\nSample sizes by subclasses:\n\n")
- print.data.frame(qqoi$qn, digits = digits)
- cat("\n\n")
- invisible(x)
- }
- }
- }
+print.matchit <- function(x, digits = getOption("digits"), ...){
+ cat("\nCall: ", deparse(x$call), sep="\n")
+ cat("\nSample sizes:\n")
+
+ #if(any(x$weights>0))
+ # nn <- rbind(table(x$treat),
+ # table(x$weights>0, x$treat),
+ # c(0,0))
+ #else
+ # nn <- rbind(table(x$treat),
+ # table(x$weights>0,x$treat)[2:1,])
+
+ nn <- matrix(0, ncol=2, nrow=4)
+ nn[1,] <- c(sum(x$treat==0), sum(x$treat==1))
+ nn[2,] <- c(sum(x$treat==0 & x$weights>0), sum(x$treat==1 & x$weights>0))
+ nn[3,] <- c(sum(x$treat==0 & x$weights==0 & x$discarded==0), sum(x$treat==1 & x$weights==0 & x$discarded==0))
+ nn[4,] <- c(sum(x$treat==0 & x$weights==0 & x$discarded==1), sum(x$treat==1 & x$weights==0 & x$discarded==1))
+
+ dimnames(nn) <- list(c("All","Matched","Unmatched","Discarded"),
+ c("Control","Treated"))
+ print.table(nn, ...)
+ invisible(x)
+ cat("\n")
+}
diff --git a/R/print.matchit.exact.R b/R/print.matchit.exact.R
new file mode 100644
index 0000000..cfba1bc
--- /dev/null
+++ b/R/print.matchit.exact.R
@@ -0,0 +1,16 @@
+print.matchit.exact <- function(x, digits = getOption("digits"), ...){
+ cat("\nCall: ", deparse(x$call), sep = "\n")
+ cat("\nExact Subclasses: ", max(x$subclass, na.rm=T),"\n",sep="")
+ cat("\nSample sizes:\n")
+ ntab <- table(factor(!is.na(x$subclass),
+ levels=c("TRUE","FALSE")),
+ x$treat)
+ nn <- rbind(table(x$treat),
+ ntab[c("TRUE","FALSE"),])
+ dimnames(nn) <- list(c("All","Matched","Unmatched"),
+ c("Control","Treated"))
+ print.table(nn, ...)
+ invisible(x)
+ cat("\n")
+}
+
diff --git a/R/print.matchit.full.R b/R/print.matchit.full.R
new file mode 100644
index 0000000..32c448a
--- /dev/null
+++ b/R/print.matchit.full.R
@@ -0,0 +1,17 @@
+print.matchit.full <- function(x, digits = getOption("digits"), ...){
+ cat("\nCall: ", deparse(x$call), sep = "\n")
+ cat("\nSample sizes:\n")
+
+ if (any(x$weights>0))
+ nn <- rbind(table(x$treat),
+ table(x$weights>0, x$treat),
+ c(0,0))
+ else
+ nn <- rbind(table(x$treat),
+ table(x$weights>0,x$treat)[2:1,])
+ dimnames(nn) <- list(c("All","Matched","Discarded"),
+ c("Control","Treated"))
+ print.table(nn, ...)
+ invisible(x)
+ cat("\n")
+}
diff --git a/R/print.matchit.subclass.R b/R/print.matchit.subclass.R
new file mode 100644
index 0000000..53e23ac
--- /dev/null
+++ b/R/print.matchit.subclass.R
@@ -0,0 +1,12 @@
+print.matchit.subclass <- function(x, digits = getOption("digits"), ...){
+ cat("\nCall: ", deparse(x$call), sep = "\n")
+ cat("\nSample sizes by subclasses:\n\n")
+ nsub <- table(x$subclass,x$treat)
+ nn <- rbind(table(x$treat),nsub)
+ dimnames(nn) <-
+ list(c("All",paste("Subclass",dimnames(nsub)[[1]])),
+ c("Control","Treated"))
+ print.table(nn, ...)
+ invisible(x)
+ cat("\n")
+}
diff --git a/R/print.neyman.R b/R/print.neyman.R
deleted file mode 100644
index aa50268..0000000
--- a/R/print.neyman.R
+++ /dev/null
@@ -1,9 +0,0 @@
-print.neyman<-function(x, digits=max(3, getOption("digits") -3), ...){
- print.res<-cbind(x$ate, x$se)
- colnames(print.res)<-c("ATE", "SD")
- print.res <- print.res[1,]
-
- cat("\n Average Treatment Effect:\n \n")
- print.matrix(print.res, digits=digits, ...)
- cat("\n")
-}
diff --git a/R/print.summary.matchit.R b/R/print.summary.matchit.R
index 58ad985..8b0803f 100644
--- a/R/print.summary.matchit.R
+++ b/R/print.summary.matchit.R
@@ -1,61 +1,26 @@
-print.summary.matchit <- function(x, digits = max(3, getOption("digits") - 3), ...){
- verbose <- x$verbose
+print.summary.matchit <- function(x, digits = max(3,
+ getOption("digits") - 3), ...){
+
sum.all <- x$sum.all
sum.matched <- x$sum.matched
q.table <- x$q.table
- xn <- x$xn
- sig <- x$sig
- cat("\nAssignment model specification:\n", deparse(x$call),"\n\n",sep = "")
- if(verbose){
- cat("Summary of covariates and interactions for all data:\n\n")
- } else{
- cat("Summary of covariates for all data:\n\n")
- }
- print(sum.all, digits=digits)
+ xn <- x$nn
+ cat("\nCall:", deparse(x$call), sep = "\n")
+ cat("\nSummary of balance for all data:\n")
+ print.data.frame(round(sum.all,digits))
cat("\n")
+
xs1 <- sum.matched
cc <- row.names(sum.all)
- if(!is.null(x$match.matrix) | identical(eval(x$call$full),TRUE))
+ if(!is.null(x$sum.matched) | identical(eval(x$call$method),"All"))
{
- if(verbose){
- cat("Summary of covariates and interactions for matched data:\n\n")
- } else {
- cat("Summary of covariates for matched data:\n\n")
- }
- print(xs1, digits=digits)
- cat("\nSample sizes:\n\n")
- print.data.frame(xn, digits=digits)
- cat("\n")
- cat("Problematic covariates: ")
- xs2 <- xs1[1:(nrow(xs1)-1),]
- cat(row.names(xs2)[!is.na(xs2[,4]) & abs(xs2[,4])>sig])
+ cat("\nSummary of balance for matched data:\n")
+ print.data.frame(round(xs1,digits))
+ cat("\nPercent Balance Improvement:\n")
+ print.data.frame(round(x$reduction,digits))
+ cat("\nSample sizes:\n")
+ print.table(xn, digits=digits)
cat("\n")
}
- if(!is.null(x$psclass) & !identical(eval(x$call$full),TRUE))
- {
- if(identical(eval(x$call$exact),TRUE)){
- cat("\nSample sizes for full and exactly matched data:\n\n")
- print(xn,digits = digits)
- cat("\nSample sizes by covariates:\n\n")
- print(q.table, digits = digits)
- cat("\n")
- } else {
- print(q.table, digits = digits)
- cat("\nSample sizes by subclasses:\n\n")
- print.data.frame(x$qn, digits = digits)
- cat("\n")
- cat("Problematic covariates:\n")
- for(i in 1:dim(q.table)[3])
- {
- cat("Subclass ", i, ": ")
- xs1 <- q.table[cc,,i]
- cat(row.names(xs1)[!is.na(xs1[,4]) & abs(xs1[,4])>sig])
- cat("\n")
- }
- }
- }
-
- cat("Number of units discarded: ", sum(x$in.sample==FALSE))
- cat("\n\n")
invisible(x)
}
diff --git a/R/print.summary.matchit.exact.R b/R/print.summary.matchit.exact.R
new file mode 100644
index 0000000..b48cae9
--- /dev/null
+++ b/R/print.summary.matchit.exact.R
@@ -0,0 +1,17 @@
+print.summary.matchit.exact <- function(x, digits = max(3,
+ getOption("digits") - 3),
+ ...){
+ cat("\nCall:", deparse(x$call), sep = "\n")
+ cat("\nSample sizes:\n")
+ ntab <- table(factor(!is.na(x$subclass),
+ levels=c("TRUE","FALSE")), x$treat)
+ nn <- rbind(table(x$treat),
+ ntab[c("TRUE","FALSE"),])
+ dimnames(nn) <- list(c("All","Matched","Discarded"),
+ c("Control","Treated"))
+ print.table(nn,digits=digits)
+ cat("\nMatched sample sizes by subclass:\n")
+ print.data.frame(x$q.table, digits = digits)
+ cat("\n")
+ invisible(x)
+}
diff --git a/R/print.summary.matchit.subclass.R b/R/print.summary.matchit.subclass.R
new file mode 100644
index 0000000..eb29e1a
--- /dev/null
+++ b/R/print.summary.matchit.subclass.R
@@ -0,0 +1,20 @@
+print.summary.matchit.subclass <- function(x, digits = max(3,
+ getOption("digits") -
+ 3), ...){
+ sum.all <- x$sum.all
+ sum.matched <- x$sum.matched
+ q.table <- x$q.table
+ cat("\nCall:", deparse(x$call), sep = "\n")
+ cat("Summary of balance for all data:\n")
+ print.data.frame(round(sum.all,digits))
+ cat("\n")
+ cat("\nSummary of balance by subclasses:\n")
+ print.table(round(q.table, digits))
+ cat("\nSample sizes by subclasses:\n")
+ print.data.frame(x$qn, digits = digits)
+ cat("\nSummary of balance across subclasses\n")
+ print.data.frame(round(x$sum.subclass, digits))
+ cat("\nPercent Balance Improvement:\n")
+ print.data.frame(round(x$reduction,digits))
+ cat("\n")
+}
diff --git a/R/print.summary.neyman.R b/R/print.summary.neyman.R
deleted file mode 100644
index cffbc07..0000000
--- a/R/print.summary.neyman.R
+++ /dev/null
@@ -1,11 +0,0 @@
-print.summary.neyman<-function(x, digits=max(3, getOption("digits")-3),
- signif.stars=getOption("show.signif.stars"),
- ...){
- cat("\n Average Treatment Effect:\n")
- printCoefmat(x$results, digits=digits, signif.starts = signif.stars, ...)
- cat("\n")
-
- cat("\n Sample Sizes:\n")
- print.matrix(x$sample.size)
- cat("\n")
-}
diff --git a/R/qqsum.R b/R/qqsum.R
new file mode 100644
index 0000000..3d4f884
--- /dev/null
+++ b/R/qqsum.R
@@ -0,0 +1,18 @@
+## Function for QQ summary stats
+qqsum <- function (x, y){
+ sx <- sort(x)
+ sy <- sort(y)
+ lenx <- length(sx)
+ leny <- length(sy)
+ if (leny < lenx)
+ sx <- approx(1:lenx, sx, n = leny)$y
+ if (leny > lenx)
+ sy <- approx(1:leny, sy, n = lenx)$y
+ dxy <- abs(sx-sy)
+ meandiff <- mean(dxy)
+ meddiff <- as.numeric(median(dxy))
+ maxdiff <- max(dxy)
+ invisible(list(meandiff=meandiff,
+ meddiff = meddiff,
+ maxdiff = maxdiff))
+}
diff --git a/R/subclassify.R b/R/subclassify.R
deleted file mode 100644
index 061fd99..0000000
--- a/R/subclassify.R
+++ /dev/null
@@ -1,124 +0,0 @@
-subclassify <- function(formula,data,in.sample,pscore,nearest=TRUE,
- match.matrix,subclass=0,sub.by="treat",
- counter=TRUE, full = FALSE, full.options=list()){
-
- data <- eval(data,parent.frame())
- treata <- model.frame(formula,data)[,1,drop=FALSE]
- treat <- as.vector(treata[,1])
- names(treat) <- row.names(treata)
-
- if(full) { # full matching with propensity score
- if(counter){cat("Full Matching...")}
- full <- full.options
- if(!is.list(full.options)){
- warning("full.options must be a list; assuming defaults for full matching",call.=FALSE)
- }
- if(is.null(full$min.controls)){
- full$min.controls <- 0
- }
- if(is.null(full$max.controls)){
- full$max.controls <- Inf
- }
- if(is.null(full$omit.fraction)){
- full$omit.fraction <- NULL
- }
- if(is.null(full$tol)){
- full$tol <- 0.01
- }
- if(is.null(full$subclass.indices)){
- full$subclass.indices <- NULL
- }
- notin <- names(full.options)[which(!names(full.options)%in%c("min.controls","max.controls",
- "omit.fraction","omit.fraction",
- "tol", "subclass.indices"))]
- if(!is.null(notin)){
- warning(paste(notin,collapse=" "), " in full.options invalid and ignored for full matching",call.=FALSE)
- }
- n1 <- length(treat[treat==1])
- n0 <- length(treat[treat==0])
- p1 <- pscore[treat==1]
- p0 <- pscore[treat==0]
- distance <- matrix(0, ncol=n0, nrow=n1)
- rownames(distance) <- row.names(treata)[treat==1]
- colnames(distance) <- row.names(treata)[treat==0]
- for (i in 1:n1)
- distance[i,] <- abs(p1[i]-p0)
- full <- as.matrix(fullmatch(distance,subclass.indices = full$subclass.indices,
- min.controls = full$min.controls,
- max.controls = full$max.controls,
- omit.fraction = full$omit.fraction,
- tol = full$tol))
- psclass <- full[pmatch(row.names(treata), row.names(full)),]
- psclass <- as.numeric(as.factor(psclass))
- names(psclass) <- row.names(treata)
- q <- NULL
- if(counter){cat("Done\n")}
- }
- else if(any(subclass>0)){
- if(counter){cat("Subclassifying...")}
- n <- length(treat)
- if(nearest){
- match.matrix <- match.matrix[in.sample[treat==1],,drop=F]
- t.units <- row.names(match.matrix)[!is.na(match.matrix)]
-
- c.units <- na.omit(as.vector(as.matrix(match.matrix)))
- matched <-c(t.units,c.units)
- matched <- names(treat)%in%matched
- } else{
- matched <- rep(TRUE,n)}
- names(matched) <- names(treat)
- m1 <- matched[treat==1]
- m0 <- matched[treat==0]
- p1 <- pscore[treat==1][m1]
- p0 <- pscore[treat==0][m0]
-
- if(length(subclass)!=1 | (length(subclass)==1 &
- identical(subclass<1,TRUE))) {
- subclass <- sort(subclass)
- if (subclass[1]==0) subclass <- subclass[-1]
- if (subclass[length(subclass)]==1) subclass <- subclass[-length(subclass)]
- if(sub.by=="treat") {
- q <- c(0,quantile(p1,probs=c(subclass)),1)
- }
- else if(sub.by=="control") {
- q <- c(0,quantile(p0,probs=c(subclass)),1)
- }
- else if(sub.by=="all") {
- q <- c(0,quantile(pscore,probs=c(subclass)),1)
- }
- ## else {stop("Must specify a valid sub.by",call.=FALSE)
- ## }
- }
- else {
- ## if(subclass<=0){stop("Subclass must be a positive vector",call.=FALSE)}
- sprobs <- seq(0,1,length=(round(subclass)+1))
- sprobs <- sprobs[2:(length(sprobs)-1)]
- if(sub.by=="treat") {
- q <- c(0,quantile(p1,probs=sprobs,na.rm=TRUE),1)
- }
- else if(sub.by=="control") {
- q <- c(0,quantile(p0,probs=sprobs,na.rm=TRUE),1)
- }
- else if(sub.by=="all") {
- q <- c(0,quantile(pscore,probs=sprobs,na.rm=TRUE),1)
- }
- ## else {stop("Must specify a valid sub.by",call.=FALSE)}
- }
-
- qbins <- length(q)-1
- psclass <- rep(0,n)
- names(psclass) <- names(treat)
- for (i in 1:qbins){
- q1 <- q[i]
- q2 <- q[i+1]
- psclass <- psclass+i*as.numeric(pscore<q2 & pscore>=q1)}
-
- ## Make sure not to assign subclass to discarded units
- psclass[in.sample==0] <- 0
- psclass[!matched] <- 0
- if(counter){cat("Done\n")}
- }
- else {psclass <- NULL; q=NULL}
-
- return(list(psclass = psclass, q.cut = q))
-}
diff --git a/R/summary.matchit.R b/R/summary.matchit.R
index 473aa67..5e20d22 100644
--- a/R/summary.matchit.R
+++ b/R/summary.matchit.R
@@ -1,223 +1,69 @@
-summary.matchit <- function(object, verbose=F, sig=2, ...) {
- t.test.wtd <- function(x, treat, weights) {
- trt <- cov.wt(as.matrix(x[treat==1]), wt=weights[treat==1])
- mean.t <- trt$center
- cov.t <- trt$cov
- cont <- cov.wt(as.matrix(x[treat==0]), wt=weights[treat==0])
- mean.c <- cont$center
- cov.c <- cont$cov
- n.t <- sum(weights[treat==1])
- n.c <- sum(weights[treat==0])
- ratio.t <- cov.t/n.t
- ratio.c <- cov.c/n.c
- tt <- (mean.t-mean.c)/sqrt(ratio.t+ratio.c)
- }
- weighted.var <- function(x, w) {
- sum(w * (x - weighted.mean(x,w))^2)/(sum(w) - 1)}
- qoi <- function(xx,tt,ww, psc = NULL, full = F){
- xsum <- matrix(0,2,5)
- xsum <- as.data.frame(xsum)
- row.names(xsum) <- c("Full","Matched")
- names(xsum) <- c("Means Treated","Means Control","SD","T-stat","Bias")
- xn <- matrix(0,2,3)
- xn <- as.data.frame(xn)
- row.names(xn) <- c("Full","Matched")
- names(xn) <- c("Treated","Control","Total")
- xn[1,1] <- sum(tt==1)
- xn[1,2] <- sum(tt==0)
- xn[1,3] <- length(tt)
- x1 <- xx[tt==1]
- x0 <- xx[tt==0]
- xsum[1,1] <- mean(x1,na.rm=T)
- xsum[1,2] <- mean(x0,na.rm=T)
- if(!full){
- X.t.m <- xx[tt==1][ww[tt==1]>0]
- X.c.m <- xx[tt==0][ww[tt==0]>0]
- xsum[2,1] <- weighted.mean(X.t.m, ww[tt==1][ww[tt==1]>0])
- xsum[2,2] <- weighted.mean(X.c.m, ww[tt==0][ww[tt==0]>0])
- if(sum(tt==1)<2|(sum(tt==0)<2)){
- xsum[c(1,2),c(3,4,5)] <- NA
- } else {
- xsum[1,3] <- sd(xx,na.rm=T)
- if (sd(xx)>0)
- xsum[1,4] <- -1*t.test(xx~tt)$sta
- else
- xsum[1,4] <- NaN
- xsum[1,5] <- (mean(x1,na.rm=T)-mean(x0,na.rm=T))/sd(x1,na.rm=T)
- xsum[2,3] <- sqrt(weighted.var(xx[ww>0],ww[ww>0]))
- xsum[2,4] <- t.test.wtd(xx[ww>0],tt[ww>0],ww[ww>0])
- xsum[2,5] <- (xsum[2,1]-xsum[2,2])/sqrt(weighted.var(X.t.m,ww[tt==1][ww[tt==1]>0]))
- }
- xn[2,1] <- sum(tt[ww>0]==1)
- xn[2,2] <- sum(tt[ww>0]==0)
- xn[2,3] <- length(tt[ww>0])
- } else {
- xn[2,] <- xn[1,] #??
- m0 <- lm(xx~ tt)
- m1 <- lm(xx~ tt + as.factor(psc))
- tdat <- m1$model
- tdat$tt <- 1
- xsum[2,1] <- mean(predict(m1,newdata=tdat))
- tdat$tt <- 0
- xsum[2,2] <- mean(predict(m1,newdata=tdat))
- xsum[1,3] <- xsum[2,3] <- sd(xx)
- xsum[1,4] <- summary(m0)$coef[2,3]
- xsum[1,5] <- (xsum[1,1]-xsum[1,2])/sd(x1)
- xsum[2,4] <- summary(m1)$coef[2,3]
- xsum[2,5] <- (xsum[2,1]-xsum[2,2])/sd(x1)
- }
- list(xsum=xsum,xn=xn)
- }
- qoi.by.sub <- function(xx,tt,ww,qq){
- qbins <- max(qq,na.rm=TRUE)
- q.table <- matrix(0,5,qbins)
- qn <- matrix(0,3,qbins)
- matched <- ww!=0
- for (i in 1:qbins)
- {
- qi <- qq[matched]==i
- qx <- xx[matched][qi]
- qt <- tt[matched][qi]
- qw <- ww[matched][qi]
- if(sum(qt==1)<2|(sum(qt==0)<2)){
- if(sum(qt==1)<2){warning("Not enough treatment units in subclass ",i,call.=FALSE)}
- else if(sum(qt==0)<2){warning("Not enough control units in subclass ",i,call.=FALSE)}
- }
- qoi.i <- qoi(qx,qt,qw)
- q.table[,i] <- as.numeric(qoi.i$xsum[1,])
- qn[,i] <- as.numeric(qoi.i$xn[1,])
- }
- q.table <- as.data.frame(q.table)
- qn <- as.data.frame(qn)
- names(q.table) <- names(qn) <- paste("Subclass",1:qbins)
- row.names(q.table) <- names(qoi.i$xsum)
- row.names(qn) <- c("Treated","Control","Total")
- list(q.table=q.table,qn=qn)
- }
- #setting up covariates
- pscore <- object$data[,"pscore"]
- treat <- eval(object$treat,object$data)
- names(treat) <- names(pscore)
- data <- object$data
- weights <- object$psweights
- covariates <- as.data.frame(model.matrix(terms(object$formula),data)[,,drop=FALSE])
- if ("(Intercept)"%in%colnames(covariates))
- covariates <- covariates[,-match("(Intercept)", colnames(covariates)),drop=FALSE]
- mahvars <- eval(object$call$mahvars)
- exact <- eval(object$call$exact)
- subclass <- eval(object$call$subclass)
- if(is.null(subclass)){subclass <- 0}
- psclass <- object$psclass
- if (!is.null(mahvars)){
- w <- mahvars%in%names(covariates)
- if(sum(w)!=length(mahvars)){
- md <- as.data.frame(as.matrix(data[,mahvars[!w]]))
- names(md) <- mahvars[!w]
- row.names(md) <- row.names(covariates)
- covariates <- cbind(covariates,md)
- }
- }
- if (!identical(TRUE, exact) & !identical(FALSE, exact) & !is.null(exact)) {
- w <- eval(exact)%in%names(covariates)
- if(sum(w)!=length(exact)) {
- ed <- as.data.frame(as.matrix(data[,exact[!w]]))
- names(ed) <- exact[!w]
- row.names(ed) <- row.names(covariates)
- covariates <- cbind(covariates,ed)
- }
- }
- if(!identical(TRUE,exact)){
- covariates<- cbind(pscore,covariates)
- }
+summary.matchit <- function(object, interactions = FALSE, addlvariables = NULL, ...) {
- if(!identical(eval(object$call$full),TRUE)){
- aa <- apply(covariates,2,qoi,tt=treat,ww=weights)
- } else {
- aa <- apply(covariates,2,qoi,tt=treat,ww=weights,
- psc = object$data$psclass, full = T)
- }
- k <- length(aa)
- sum.all <- as.data.frame(matrix(0,k,5))
- sum.matched <- as.data.frame(matrix(0,k,6))
- row.names(sum.all) <- row.names(sum.matched) <- names(aa)
- names(sum.all) <- names(sum.matched) <- names(aa[[1]]$xsum)
- names(sum.matched) <- c(names(aa[[1]]$xsum),"Reduction")
+ XX <- cbind(distance=object$distance,object$X)
+ if (!is.null(addlvariables)) XX <- cbind(XX, addlvariables)
+
+ treat <- object$treat
+ weights <- object$weights
+ nam <- dimnames(XX)[[2]]
+ kk <- ncol(XX)
+
+ ## Summary Stats
+ aa <- apply(XX,2,qoi,tt=treat,ww=weights)
+ sum.all <- as.data.frame(matrix(0,kk,7))
+ sum.matched <- as.data.frame(matrix(0,kk,7))
+ row.names(sum.all) <- row.names(sum.matched) <- nam
+ names(sum.all) <- names(sum.matched) <- names(aa[[1]])
sum.all.int <- sum.matched.int <- NULL
- for(i in 1:k){
- sum.all[i,] <- aa[[i]]$xsum[1,]
- sum.matched[i,1:5] <- aa[[i]]$xsum[2,]
- sum.matched[i,6] <- (abs(sum.all[i,5])-abs(sum.matched[i,5]))>0
- if(verbose){
- for(j in i:k){
- x2 <- covariates[,i]*as.matrix(covariates[,j])
- if(!identical(eval(object$call$full),TRUE)){
- jqoi <- qoi(x2,tt=treat,ww=weights)
- } else {
- jqoi <- qoi(x2,tt=treat,ww=weights,
- psc = object$data$psclass, full = T)
- }
- sum.all.int <- rbind(sum.all.int,jqoi$xsum[1,])
- Reduction <- (abs(sum.all.int[nrow(sum.all.int),5])-abs(jqoi$xsum[2,5]))>0
- sum.matched.int <- rbind(sum.matched.int,cbind(jqoi$xsum[2,],Reduction))
+ for(i in 1:kk){
+ sum.all[i,] <- aa[[i]][1,]
+ sum.matched[i,] <- aa[[i]][2,]
+ if(interactions){
+ for(j in i:kk){
+ x2 <- XX[,i]*as.matrix(XX[,j])
+ jqoi <- qoi(x2,tt=treat,ww=weights)
+ sum.all.int <- rbind(sum.all.int,jqoi[1,])
+ sum.matched.int <- rbind(sum.matched.int,jqoi[2,])
row.names(sum.all.int)[nrow(sum.all.int)] <-
row.names(sum.matched.int)[nrow(sum.matched.int)] <-
- paste(names(covariates)[i],names(covariates)[j],sep="x")
+ paste(nam[i],nam[j],sep="x")
}
}
}
xn <- aa[[1]]$xn
sum.all <- rbind(sum.all,sum.all.int)
sum.matched <- rbind(sum.matched,sum.matched.int)
-
- #now subclassification
- if(!identical(subclass,0) & !identical(eval(object$call$full),TRUE))
- {
- qbins <- max(psclass,na.rm=TRUE)
- if(verbose){
- q.table <- array(0,dim=c(k+sum(1:k),5,qbins))
- ii <- 0
- nn <- NULL
- } else {
- q.table <- array(0,dim=c(k,5,qbins))
- }
- aa <- apply(covariates,2,qoi.by.sub,tt=treat,ww=weights,
- qq=object$psclass)
- for(i in 1:k){
- if(!verbose){
- q.table[i,,] <- as.matrix(aa[[i]]$q.table)
- nn <- names(aa)
- } else {
- ii <- ii + 1
- q.table[ii,,] <- as.matrix(aa[[i]]$q.table)
- nn <- c(nn,names(aa)[i])
- for(j in i:k){
- ii <- ii + 1
- x2 <- covariates[,i]*as.matrix(covariates[,j])
- q.table[ii,,] <- as.matrix(qoi.by.sub(x2,tt=treat,ww=weights,qq=object$psclass)$q.table)
- nn <- c(nn,paste(names(covariates)[i],names(covariates)[j],sep="x"))
- }
- }
- }
- qn <- aa[[1]]$qn
- dimnames(q.table) <- list(nn,row.names(aa[[i]]$q.table),paste("Subclass",1:qbins))
- } else {
- q.table <- NULL; q.bias <- NULL; qn <- NULL
- }
- if(identical(exact,TRUE)){
- qbins <- max(object$psclass,na.rm=TRUE)
- q.table <- as.data.frame(matrix(0,qbins,k+3))
- names(q.table) <- c(names(covariates),"Treated","Control","Total")
- for(i in 1:qbins){
- qi <- object$psclass==i
- q.table[i,] <- c(as.numeric(covariates[qi,,drop=F][1,]), sum(treat[qi]==1), sum(treat[qi]==0), length(treat[qi]))
- }
+
+ ## Imbalance Reduction
+ stat0 <- abs(cbind(sum.all[,2]-sum.all[,1],
+ sum.all[,5:7]))
+ stat1 <- abs(cbind(sum.matched[,2]-sum.matched[,1],
+ sum.matched[,5:7]))
+ reduction <- as.data.frame(100*(stat0-stat1)/stat0)
+ if(sum(stat0==0 & stat1==0, na.rm=T)>0){
+ reduction[stat0==0 & stat1==0] <- 0
}
- ans <- list(sum.all = sum.all, sum.matched = sum.matched,
- q.table=q.table, call=object$call, verbose=verbose,
- xn = xn, match.matrix = object$match.matrix, sig = sig,
- psclass=object$psclass, in.sample = object$in.sample,
- qn = qn)
- class(ans) <- "summary.matchit"
- ans
-}
+ if(sum(stat0==0 & stat1>0,na.rm=T)>0){
+ reduction[stat0==0 & stat1>0] <- -Inf
+ }
+ names(reduction) <- c("Mean and Std. Bias", "QQ Med","QQ Mean", "QQ Max")
+
+ ## Sample sizes
+ nn <- matrix(0, ncol=2, nrow=4)
+ nn[1,] <- c(sum(object$treat==0), sum(object$treat==1))
+ nn[2,] <- c(sum(object$treat==0 & object$weights>0), sum(object$treat==1 & object$weights>0))
+ nn[3,] <- c(sum(object$treat==0 & object$weights==0 & object$discarded==0), sum(object$treat==1 & object$weights==0 & object$discarded==0))
+ nn[4,] <- c(sum(object$treat==0 & object$weights==0 & object$discarded==1), sum(object$treat==1 & object$weights==0 & object$discarded==1))
+
+ dimnames(nn) <- list(c("All","Matched","Unmatched","Discarded"),
+ c("Control","Treated"))
+
+ #nn <- rbind(table(object$treat),
+ # table(object$weights!=0,object$treat)[2:1,])
+ ## output
+ res <- list(call=object$call, nn = nn, sum.all = sum.all, sum.matched = sum.matched,
+ reduction = reduction)
+ class(res) <- "summary.matchit"
+ return(res)
+}
diff --git a/R/summary.matchit.exact.R b/R/summary.matchit.exact.R
new file mode 100644
index 0000000..ed94a96
--- /dev/null
+++ b/R/summary.matchit.exact.R
@@ -0,0 +1,29 @@
+summary.matchit.exact <- function(object, covariates = FALSE, ...) {
+ XX <- object$X
+ treat <- object$treat
+ qbins <- max(object$subclass,na.rm=TRUE)
+ if(!covariates){
+ q.table <- as.data.frame(matrix(0,qbins,3))
+ names(q.table) <- c("Treated","Control","Total")
+ for(i in 1:qbins){
+ qi <- object$subclass==i
+ q.table[i,] <- c(sum(treat[qi]==1, na.rm=T), sum(treat[qi]==0, na.rm=T),
+ length(treat[qi & !is.na(qi)]))
+ }
+ } else {
+ kk <- ncol(XX)
+ q.table <- as.data.frame(matrix(0,qbins,kk+3))
+ names(q.table) <- c("Treated","Control","Total",dimnames(XX)[[2]])
+ for(i in 1:qbins){
+ qi <- object$subclass==i & !is.na(object$subclass==i)
+ q.table[i,] <- c(sum(treat[qi]==1, na.rm=T), sum(treat[qi]==0, na.rm=T),
+ length(treat[qi & !is.na(qi)]),as.numeric(XX[qi,,drop=F][1,]))
+ }
+ }
+
+ ## output
+ res <- list(q.table = q.table, subclass = object$subclass,
+ treat = object$treat, call = object$call)
+ class(res) <- c("summary.matchit.exact", "summary.matchit")
+ return(res)
+}
diff --git a/R/summary.matchit.full.R b/R/summary.matchit.full.R
new file mode 100644
index 0000000..e810aea
--- /dev/null
+++ b/R/summary.matchit.full.R
@@ -0,0 +1,70 @@
+summary.matchit.full <- function(object, interactions = FALSE, addlvariables = NULL, numdraws = 5000, ...) {
+
+ XX <- cbind(distance=object$distance,object$X)
+ if (!is.null(addlvariables)) XX <- cbind(XX, addlvariables)
+
+ treat <- object$treat
+ weights <- object$weights
+ nam <- dimnames(XX)[[2]]
+ kk <- ncol(XX)
+
+ ## Get samples of T and C units to send to qqplot
+ t.plot <- sample(names(treat)[treat==1], numdraws/2, replace=TRUE, prob=weights[treat==1])
+ c.plot <- sample(names(treat)[treat==0], numdraws/2, replace=TRUE, prob=weights[treat==0])
+
+ ## Summary Stats
+ aa <- apply(XX,2,qoi,tt=treat,ww=weights, t.plot=t.plot, c.plot=c.plot)
+ sum.all <- as.data.frame(matrix(0,kk,7))
+ sum.matched <- as.data.frame(matrix(0,kk,7))
+ row.names(sum.all) <- row.names(sum.matched) <- nam
+ names(sum.all) <- names(sum.matched) <- names(aa[[1]])
+ sum.all.int <- sum.matched.int <- NULL
+ for(i in 1:kk){
+ sum.all[i,] <- aa[[i]][1,]
+ sum.matched[i,] <- aa[[i]][2,]
+ if(interactions){
+ for(j in i:kk){
+ x2 <- XX[,i]*as.matrix(XX[,j])
+ jqoi <- qoi(x2,tt=treat,ww=weights, t.plot=t.plot, c.plot=c.plot)
+ sum.all.int <- rbind(sum.all.int,jqoi[1,])
+ sum.matched.int <- rbind(sum.matched.int,jqoi[2,])
+ row.names(sum.all.int)[nrow(sum.all.int)] <-
+ row.names(sum.matched.int)[nrow(sum.matched.int)] <-
+ paste(nam[i],nam[j],sep="x")
+ }
+ }
+ }
+ xn <- aa[[1]]$xn
+ sum.all <- rbind(sum.all,sum.all.int)
+ sum.matched <- rbind(sum.matched,sum.matched.int)
+
+ ## Imbalance Reduction
+ stat0 <- abs(cbind(sum.all[,2]-sum.all[,1],
+ sum.all[,5:7]))
+ stat1 <- abs(cbind(sum.matched[,2]-sum.matched[,1],
+ sum.matched[,5:7]))
+ reduction <- as.data.frame(100*(stat0-stat1)/stat0)
+ if(sum(stat0==0 & stat1==0, na.rm=T)>0){
+ reduction[stat0==0 & stat1==0] <- 0
+ }
+ if(sum(stat0==0 & stat1>0,na.rm=T)>0){
+ reduction[stat0==0 & stat1>0] <- -Inf
+ }
+ names(reduction) <- c("Mean and Std. Bias", "QQ Med","QQ Mean", "QQ Max")
+
+ ## Sample sizes
+ nn <- matrix(0, ncol=2, nrow=4)
+ nn[1,] <- c(sum(object$treat==0), sum(object$treat==1))
+ nn[2,] <- c(sum(object$treat==0 & object$weights>0), sum(object$treat==1 & object$weights>0))
+ nn[3,] <- c(sum(object$treat==0 & object$weights==0 & object$discarded==0), sum(object$treat==1 & object$weights==0 & object$discarded==0))
+ nn[4,] <- c(sum(object$treat==0 & object$weights==0 & object$discarded==1), sum(object$treat==1 & object$weights==0 & object$discarded==1))
+
+ dimnames(nn) <- list(c("All","Matched","Unmatched","Discarded"),
+ c("Control","Treated"))
+
+ ## output
+ res <- list(call=object$call, nn = nn, sum.all = sum.all, sum.matched = sum.matched,
+ reduction = reduction)
+ class(res) <- c("summary.matchit.full", "summary.matchit")
+ return(res)
+}
diff --git a/R/summary.matchit.subclass.R b/R/summary.matchit.subclass.R
new file mode 100644
index 0000000..7ca31a8
--- /dev/null
+++ b/R/summary.matchit.subclass.R
@@ -0,0 +1,110 @@
+summary.matchit.subclass <- function(object, interactions = FALSE, addlvariables=NULL,
+ ...) {
+
+ XX <- cbind(distance=object$distance,object$X)
+ if (!is.null(addlvariables)) XX <- cbind(XX, addlvariables)
+
+ treat <- object$treat
+ weights <- object$weights
+ nam <- dimnames(XX)[[2]]
+ kk <- ncol(XX)
+
+ ## Summary Stats
+ aa <- apply(XX,2,qoi,tt=treat,ww=as.numeric(weights!=0))
+ sum.all <- as.data.frame(matrix(0,kk,7))
+ sum.matched <- as.data.frame(matrix(0,kk,7))
+ row.names(sum.all) <- row.names(sum.matched) <- nam
+ names(sum.all) <- names(sum.matched) <- names(aa[[1]])
+ sum.all.int <- sum.matched.int <- NULL
+ for(i in 1:kk){
+ sum.all[i,] <- aa[[i]][1,]
+ sum.matched[i,] <- aa[[i]][2,]
+ if(interactions){
+ for(j in i:kk){
+ x2 <- XX[,i]*as.matrix(XX[,j])
+ jqoi <- qoi(x2,tt=treat,ww=as.numeric(weights!=0))
+ sum.all.int <- rbind(sum.all.int,jqoi[1,])
+ sum.matched.int <- rbind(sum.matched.int,jqoi[2,])
+ row.names(sum.all.int)[nrow(sum.all.int)] <-
+ row.names(sum.matched.int)[nrow(sum.matched.int)] <-
+ paste(nam[i],nam[j],sep="x")
+ }
+ }
+ }
+ xn <- aa[[1]]$xn
+ sum.all <- rbind(sum.all,sum.all.int)
+ sum.matched <- rbind(sum.matched,sum.matched.int)
+
+ ## By Subclass
+ qbins <- max(object$subclass,na.rm=TRUE)
+ if(interactions){
+ q.table <- array(0,dim=c(kk+sum(1:kk),7,qbins))
+ ii <- 0
+ nn <- NULL
+ } else {
+ q.table <- array(0,dim=c(kk,7,qbins))
+ }
+ aa <- apply(XX,2,qoi.by.sub,tt=treat,ww=weights,
+ qq=object$subclass)
+ for(i in 1:kk){
+ if(!interactions){
+ q.table[i,,] <- as.matrix(aa[[i]]$q.table)
+ nn <- names(aa)
+ } else {
+ ii <- ii + 1
+ q.table[ii,,] <- as.matrix(aa[[i]]$q.table)
+ nn <- c(nn,names(aa)[i])
+ for(j in i:kk){
+ ii <- ii + 1
+ x2 <- XX[,i]*as.matrix(XX[,j])
+ q.table[ii,,] <- as.matrix(qoi.by.sub(x2,tt=treat,ww=weights,qq=object$subclass)$q.table)
+ nn <- c(nn,paste(nam[i],nam[j],sep="x"))
+ }
+ }
+ }
+ qn <- aa[[1]]$qn
+ dimnames(q.table) <- list(nn,row.names(aa[[i]]$q.table),paste("Subclass",1:qbins))
+
+ ## Aggregate Subclass
+ if(is.null(object$call$sub.by)){
+ object$call$sub.by <- "treat"
+ }
+ if(object$call$sub.by=="treat") {
+ wsub <- qn[1,]/sum(qn[1,])
+ } else if(sub.by=="control") {
+ wsub <- qn[2,]/sum(qn[2,])
+ } else if(sub.by=="all") {
+ wsub <- qn[3,]/sum(qn[3,])
+ }
+ sum.subclass <- sum.all
+ for(i in 1:kk){
+ for(j in 1:7){
+ if(j==3) {
+ sum.subclass[i,j] <- sqrt(sum((wsub^2)*(q.table[i,j,]^2)))
+ } else {
+ sum.subclass[i,j] <- sum(wsub*q.table[i,j,])
+ }
+ }
+ }
+
+ ## Imbalance Reduction
+ stat0 <- abs(cbind(sum.all[,2]-sum.all[,1],
+ sum.all[,5:7]))
+ stat1 <- abs(cbind(sum.subclass[,2]-sum.subclass[,1],
+ sum.subclass[,5:7]))
+ reduction <- as.data.frame(100*(stat0-stat1)/stat0)
+ if(sum(stat0==0 & stat1==0, na.rm=T)>0){
+ reduction[stat0==0 & stat1==0] <- 0
+ }
+ if(sum(stat0==0 & stat1>0,na.rm=T)>0){
+ reduction[stat0==0 & stat1>0] <- -Inf
+ }
+ names(reduction) <- c("Mean and Std. Bias", "QQ Med","QQ Mean", "QQ Max")
+
+ ## output
+ res <- list(call=object$call, sum.all = sum.all, sum.matched = sum.matched,
+ sum.subclass = sum.subclass, reduction = reduction,
+ qn = qn, q.table = q.table)
+ class(res) <- c("summary.matchit.subclass", "summary.matchit")
+ return(res)
+}
diff --git a/R/summary.neyman.R b/R/summary.neyman.R
deleted file mode 100644
index b8be27a..0000000
--- a/R/summary.neyman.R
+++ /dev/null
@@ -1,14 +0,0 @@
-summary.neyman<-function(object,...){
- tval<-object$ate/object$se
- res<-cbind(object$ate, object$se, tval, 2*pnorm(abs(tval), lower.tail=FALSE))
- colnames(res)<-c("Estimate", "Std. Error", "z value", "Pr(>|z|)")
- rownames(res)[1]<-c("Overall")
-
- sampsize <- cbind(object$Ntrt, object$Ncont)
- colnames(sampsize) <- c("Ntrt", "Ncont")
- rownames(sampsize) <- rownames(res)
-
- output <- list(results=res, sample.size=sampsize)
- class(output)<-"summary.neyman"
- output
-}
diff --git a/R/weights.matrix.R b/R/weights.matrix.R
new file mode 100644
index 0000000..e8891b2
--- /dev/null
+++ b/R/weights.matrix.R
@@ -0,0 +1,47 @@
+weights.matrix <- function(match.matrix, treat, discarded){
+
+ n <- length(treat)
+ labels <- names(treat)
+ tlabels <- labels[treat==1]
+ clabels <- labels[treat==0]
+
+ in.sample <- !discarded
+ names(in.sample) <- labels
+
+ match.matrix <- match.matrix[tlabels,,drop=F][in.sample[tlabels],,drop=F]
+ num.matches <- dim(match.matrix)[2]-apply(as.matrix(match.matrix),
+ 1, function(x){sum(is.na(x))})
+ names(num.matches) <- tlabels[in.sample[tlabels]]
+
+ t.units <- row.names(match.matrix)[num.matches>0]
+ c.units <- na.omit(as.vector(as.matrix(match.matrix)))
+
+ weights <- rep(0,length(treat))
+ names(weights) <- labels
+ weights[t.units] <- 1
+
+ for (cont in clabels) {
+ treats <- na.omit(row.names(match.matrix)[cont==match.matrix[,1]])
+ if (dim(match.matrix)[2]>1)
+ for (j in 2:dim(match.matrix)[2])
+ treats <- c(na.omit(row.names(match.matrix)[cont==match.matrix[,j]]),treats)
+ for (k in unique(treats))
+ weights[cont] <- weights[cont] + 1/num.matches[k]
+ }
+
+ if (sum(weights[clabels])==0)
+ weights[clabels] <- rep(0, length(weights[clabels]))
+ else
+ weights[clabels] <- weights[clabels]*length(unique(c.units))/sum(weights[clabels])
+
+ weights[!in.sample] <- 0
+ if (sum(weights)==0)
+ stop("No units were matched")
+ else if (sum(weights[tlabels])==0)
+ stop("No treated units were matched")
+ else if (sum(weights[clabels])==0)
+ stop("No control units were matched")
+
+ return(weights)
+}
+
diff --git a/R/weights.subclass.R b/R/weights.subclass.R
new file mode 100644
index 0000000..8780cbf
--- /dev/null
+++ b/R/weights.subclass.R
@@ -0,0 +1,43 @@
+weights.subclass <- function(psclass, treat) {
+
+ ttt <- treat[!is.na(psclass)]
+ classes <- na.omit(psclass)
+
+ n <- length(ttt)
+ labels <- names(ttt)
+ tlabels <- labels[ttt==1]
+ clabels <- labels[ttt==0]
+
+ weights <- rep(0, n)
+ names(weights) <- labels
+ weights[tlabels] <- 1
+
+ for(j in unique(classes)){
+ qn0 <- sum(ttt==0 & classes==j)
+ qn1 <- sum(ttt==1 & classes==j)
+ weights[ttt==0 & classes==j] <- qn1/qn0
+ }
+ if (sum(weights[ttt==0])==0)
+ weights[ttt==0] <- rep(0, length(weights[clabels]))
+ else {
+ ## Number of C units that were matched to at least 1 T
+ num.cs <- sum(weights[clabels] > 0)
+ weights[clabels] <- weights[clabels]*num.cs/sum(weights[clabels])
+ }
+
+ if (any(is.na(psclass))) {
+ tmp <- rep(0, sum(is.na(psclass)))
+ names(tmp) <- names(treat[is.na(psclass)])
+ weights <- c(weights, tmp)[names(treat)]
+ }
+
+ if (sum(weights)==0)
+ stop("No units were matched")
+ else if (sum(weights[tlabels])==0)
+ stop("No treated units were matched")
+ else if (sum(weights[clabels])==0)
+ stop("No control units were matched")
+
+ return(weights)
+}
+
diff --git a/data/incomedata.tab b/data/incomedata.tab
deleted file mode 100644
index 64dd46c..0000000
--- a/data/incomedata.tab
+++ /dev/null
@@ -1,201 +0,0 @@
-"treat" "income74" "income75" "male" "educ" "income78"
-1 1441 2488 1 11 3561
-1 1869 1703 0 9 2579
-1 1516 2258 1 7 3714
-1 1690 2093 0 10 2806
-1 2336 3105 1 13 3423
-1 2154 2426 0 9 2471
-1 1653 2006 0 9 3777
-1 2252 3262 0 12 2686
-1 1589 1695 0 11 3605
-1 1763 3223 0 12 2433
-1 1950 2913 0 8 3075
-1 2017 2166 1 11 2914
-1 2135 2900 1 10 3574
-1 2000 1858 0 13 2700
-1 2228 2592 1 12 3439
-1 2052 1975 1 12 2593
-1 2174 1452 1 10 3734
-1 2922 3112 1 13 2915
-1 1493 2374 1 15 2975
-1 1543 2219 1 14 2926
-1 1502 3039 1 12 3141
-1 2503 3374 1 11 2501
-1 1776 2440 0 10 2632
-1 1635 1721 1 12 3007
-1 2140 2279 0 12 2261
-1 2574 2378 1 7 2717
-1 1285 2321 1 10 4377
-1 1689 2327 1 9 3197
-1 2041 2799 1 7 3696
-1 2854 1988 0 9 2570
-1 2695 1105 1 13 2196
-1 2490 2053 1 11 3556
-1 1861 1990 1 13 2372
-1 1419 1300 1 11 2911
-1 1237 812 1 12 3602
-1 1925 2585 0 8 3125
-1 2321 2985 1 12 3263
-1 1893 3209 1 14 2643
-1 2908 2462 1 11 2464
-1 2204 2186 0 9 2606
-1 2011 1768 0 9 3112
-1 1883 2510 1 14 2856
-1 2281 1955 1 11 3553
-1 1283 2837 0 10 2782
-1 1681 2097 0 9 2651
-1 2370 2767 1 12 3460
-1 2041 1939 0 10 2902
-1 1769 1670 1 12 3427
-1 1589 2605 0 12 2320
-1 2432 1902 1 10 3104
-1 1583 2005 1 14 3277
-1 2320 2719 1 14 3169
-1 2034 2151 0 13 2051
-1 2099 1958 0 6 3049
-1 2616 2801 0 14 2832
-1 2049 2546 0 13 2937
-1 2544 1694 0 7 3061
-1 672 2341 0 14 2505
-1 1284 2741 0 11 3410
-1 2104 2399 0 10 3655
-1 1479 2128 1 10 2773
-1 2360 2582 1 10 3180
-1 1809 2350 0 9 2654
-1 1378 2093 1 10 3356
-1 1851 2296 1 14 2126
-1 2953 1796 0 13 3351
-1 2485 1692 0 9 2497
-1 2420 2222 0 9 3512
-1 2197 1839 1 11 3726
-1 2779 2448 0 11 2524
-1 1132 2116 1 9 2804
-1 1641 3345 1 11 2811
-1 1436 2188 0 10 2766
-1 2422 2097 0 14 2781
-1 2202 2251 0 12 3580
-1 1140 2018 1 14 3126
-1 2038 2106 0 8 3440
-1 2076 1941 1 12 2670
-1 1839 1290 0 12 3067
-1 1853 1800 0 11 1986
-1 2721 1568 0 10 3238
-1 2190 2231 0 9 3047
-1 3164 2588 0 13 3423
-1 2136 2304 0 11 2546
-1 3143 2774 0 10 2152
-1 2187 2496 1 8 2847
-1 1945 2259 1 14 3251
-1 1089 2017 0 13 3593
-1 1964 2234 1 9 4722
-1 1817 1685 0 9 3707
-1 1765 2393 0 12 2598
-1 2103 2611 1 9 3456
-1 1827 2130 0 8 2921
-1 1521 2960 1 7 2674
-1 2026 2426 1 10 2634
-1 1594 3443 1 7 2739
-1 2720 1741 0 10 2559
-1 2146 2509 1 14 2578
-1 1444 2091 0 9 3868
-1 2551 2230 0 10 3477
-0 2721 2126 0 10 2072
-0 2305 1725 1 11 2587
-0 1442 2435 0 9 2631
-0 1836 2866 0 11 3295
-0 2471 1876 1 9 2453
-0 2176 2267 0 12 2770
-0 1735 2628 1 11 1559
-0 1856 1771 0 10 2131
-0 1860 2679 1 10 2019
-0 1810 1889 0 13 2600
-0 2299 1837 1 10 1429
-0 2391 2793 0 11 2504
-0 1878 1761 0 10 3778
-0 2800 2567 0 10 2204
-0 1729 2727 1 13 2545
-0 2388 2036 1 9 2920
-0 1509 2882 1 8 2572
-0 1878 1765 0 9 2514
-0 1997 2881 0 10 1954
-0 2327 2182 0 6 1726
-0 1385 2557 0 10 3083
-0 2172 2266 1 10 2879
-0 2435 2715 0 11 2989
-0 979 3556 1 9 1859
-0 1904 2371 1 9 2522
-0 2817 2793 1 13 2760
-0 2641 1462 0 12 2599
-0 1915 2634 1 7 2519
-0 2108 2338 0 8 3242
-0 2920 2026 1 10 1681
-0 1599 2709 1 7 1949
-0 2042 1808 0 12 2864
-0 2739 1805 1 10 2639
-0 2398 1906 0 16 2156
-0 1674 2573 0 13 1819
-0 1809 2233 0 8 3175
-0 1871 1874 0 11 1819
-0 2030 2033 1 11 2607
-0 1129 2427 1 10 2402
-0 3068 2391 1 6 3093
-0 1679 1418 0 11 2612
-0 1472 2432 1 9 2804
-0 1792 3312 0 11 2316
-0 2220 1786 1 11 2999
-0 2263 1701 0 11 3088
-0 2414 1845 1 10 2745
-0 2233 1954 1 10 2695
-0 1152 2250 0 8 2566
-0 2083 1785 0 10 2588
-0 575 1968 0 6 2419
-0 2161 1969 1 6 2533
-0 2036 2178 0 9 2508
-0 1701 1993 1 12 2520
-0 2263 1640 0 10 2993
-0 2391 2347 1 9 3449
-0 1254 1579 1 13 2639
-0 2415 2403 1 7 3172
-0 1848 2482 0 13 3709
-0 3216 2329 0 14 1494
-0 2276 1493 1 13 2635
-0 1767 2513 0 12 3317
-0 2395 2826 0 9 1685
-0 1773 2060 0 11 2708
-0 2723 2016 0 8 2394
-0 2523 2664 1 9 1988
-0 1587 1837 0 9 2407
-0 2253 2117 0 13 2160
-0 1504 2267 0 11 3059
-0 1746 3184 1 12 2491
-0 1941 2292 1 7 2275
-0 2411 2679 1 14 3262
-0 3127 2180 1 13 2383
-0 1801 3165 0 12 2278
-0 2120 2393 0 9 1754
-0 2074 2602 0 7 2025
-0 1793 1595 1 8 2139
-0 2399 2331 0 12 2674
-0 2043 2454 0 10 2398
-0 2477 2171 0 10 2543
-0 2061 3127 1 9 1783
-0 2182 2148 0 9 3201
-0 2355 2819 0 10 2712
-0 2063 2427 1 11 3145
-0 2250 1879 1 6 1759
-0 1332 1859 1 12 2037
-0 1198 2314 0 9 1635
-0 1504 2069 0 12 2763
-0 2482 2873 0 9 2728
-0 1257 2179 0 9 1303
-0 1553 2339 0 13 1741
-0 2368 1731 0 14 2578
-0 2432 2622 1 11 1864
-0 1866 2699 1 8 2979
-0 1562 2001 0 10 2471
-0 1856 2778 0 8 2579
-0 1572 1882 1 8 1515
-0 2636 2742 0 10 2160
-0 1453 2282 1 15 2825
-0 1558 2190 0 13 3293
-0 1941 2395 1 13 2832
diff --git a/demo/00Index b/demo/00Index
index 2c474f2..5440f3e 100644
--- a/demo/00Index
+++ b/demo/00Index
@@ -1,3 +1,8 @@
-analysis Methods for analysis of outcome using matched samples
-diagnose Propensity score specification procedure
-lalonde Examples of matching methods, using National Supported Work demonstration data
+exact Demo of exact matching, using Lalonde dataset
+full Demo of full matching, using Lalonde dataset
+genetic Demo of genetic matching, using Lalonde dataset
+match.data Demo of obtaining matched data
+nearest Demos of nearest neighbor matching, using Lalonde dataset
+subclass Demo of subclassification, using Lalonde dataset
+optimal Demo of optimal matching, using Lalonde dataset
+analysis Demo of using Zelig with MatchIt for parametric causal inference after matching
diff --git a/demo/analysis.R b/demo/analysis.R
index 9351ca5..b4ab1d7 100644
--- a/demo/analysis.R
+++ b/demo/analysis.R
@@ -1,82 +1,113 @@
-# Examples of analyses, with and without Zelig
-# Without zelig
+###
+### Example 1: calculating the average treatment effect for the treated
+###
+## load the Lalonde data
data(lalonde)
+user.prompt()
+
+## load Zelig package: if not already installed, try install.package("Zelig")
+library(Zelig)
+user.prompt()
+
+## propensity score matching
+m.out1 <- matchit(treat ~ age + educ + black + hispan + nodegree + married + re74 + re75,
+ method = "nearest", data = lalonde)
+user.prompt()
+
+## fit the linear model to the control group controlling for propensity score and
+## other covariates
+z.out1 <- zelig(re78 ~ age + educ + black + hispan + nodegree + married + re74 + re75 +
+ distance, data = match.data(m.out1, "control"), model = "ls")
+user.prompt()
+
+## set the covariates to the covariates of matched treated units
+## use conditional prediction by setting cond = TRUE.
+x.out1 <- setx(z.out1, data = match.data(m.out1, "treat"), fn = NULL, cond = TRUE)
+user.prompt()
+
+## simulate quantities of interest
+s.out1 <- sim(z.out1, x = x.out1)
+user.prompt()
+
+## obtain a summary
+summary(s.out1)
+user.prompt()
+
+
+###
+### Example 2: calculating the average treatment effect for the entire sample
+###
+
+## fit the linear model to the treatment group controlling for propensity score and
+## other covariates
+z.out2 <- zelig(re78 ~ age + educ + black + hispan + nodegree + married + re74 + re75 +
+ distance, data = match.data(m.out1, "control"), model = "ls")
+user.prompt()
+
+## conducting the simulation procedure for the control group
+x.out2 <- setx(z.out2, data = match.data(m.out1, "control"), fn = NULL, cond = TRUE)
+user.prompt()
+
+s.out2 <- sim(z.out2, x = x.out2)
+user.prompt()
+
+## Note that Zelig calculates the difference between observed and
+## either predicted or expected values. This means that the treatment
+## effect for the control units is actually the effect of control
+## (observed control outcome minus the imputed outcome under treatment
+## from the model). Hence, to combine treatment effects just reverse
+## the signs of the estimated treatment effect of controls.
+ate.all <- c(s.out1$qi$ate.ev, -s.out2$qi$ate.ev)
+user.prompt()
+
+## some summaries
+## point estimate
+mean(ate.all)
+user.prompt()
+## standard error
+sd(ate.all)
+user.prompt()
+## 95% confidence interval
+quantile(ate.all, c(0.025, 0.975))
+user.prompt()
+
+
+###
+### Example 3: subclassification
+###
+
+## subclassification with 4 subclasses
+m.out2 <- matchit(treat ~ age + educ + black + hispan + nodegree + married + re74 + re75,
+ data = lalonde, method = "subclass", subclass = 4)
+user.prompt()
+
+## controlling only for the estimated prpensity score and lagged Y within each subclass
+## one can potentially control for more
+z.out3 <- zelig(re78 ~ re74 + re75 + distance, data = match.data(m.out2, "control"),
+ model = "ls", by = "subclass")
+user.prompt()
+
+## conducting simulations
+x.out3 <- setx(z.out3, data = match.data(m.out2, "treat"), fn = NULL, cond = TRUE)
+user.prompt()
+
+## for the demonstration purpose, we set the number of simulations to be 100
+s.out3 <- sim(z.out3, x = x.out3, num = 100)
+user.prompt()
+
+## overall results
+summary(s.out3)
+user.prompt()
+
+## summary for each subclass
+summary(s.out3, subset = 1)
+user.prompt()
+
+summary(s.out3, subset = 2)
+user.prompt()
+
+summary(s.out3, subset = 3)
+user.prompt()
-# First just neyman on matched sample
-print("Neyman estimates using a matched sample")
-match.out1 <- matchit(treat ~ age + educ + black + hispan + nodegree +
- married + re74 + re75, data = lalonde)
-neyman.out <- neyman(re78, match.out1)
-print(neyman.out)
-print(summary(neyman.out))
-silent <- readline("\nPress <return> to continue: ")
-
-# Neyman using propensity score subclasses
-print("Neyman estimates with propensity score subclassification")
-match.out1s <- matchit(treat ~ age + educ + black + hispan + nodegree
- + married + re74 + re75, data = lalonde, subclass=4)
-neyman.outs <- neyman(re78, match.out1s)
-print(neyman.outs)
-print(summary(neyman.outs))
-silent <- readline("\nPress <return> to continue: ")
-
-# Neyman using exact matching
-print("Neyman estimates using exact matching")
-match.out1e <- matchit(treat ~ black + hispan, data = lalonde, exact=TRUE)
-neyman.oute <- neyman(re78, match.out1e)
-print(neyman.oute)
-print(summary(neyman.oute))
-silent <- readline("\nPress <return> to continue: ")
-
-# Run linear regression on matched data: use match.out1
-print("Linear regression on matched data set")
-lm.1 <- lm(re78 ~ treat + pscore, data=match.data(match.out1))
-
-print(summary(lm.1)$coef[2,])
-silent <- readline("\nPress <return> to continue: ")
-
-# Zelig part from zelig documentation (match.R in demo directory)
-if ("Zelig"%in%.packages(all=T)) {
- library(Zelig)
-
- ## an example for propensity score matching
- print("Using Zelig for analysis")
- z.out1 <- zelig(re78 ~ pscore, data = match.data(match.out1,
- "control"), model = "ls")
- x.out1 <- setx(z.out1, data = match.data(match.out1, "treat"),
- fn = NULL, cond = TRUE)
- s.out1 <- sim(z.out1, x = x.out1)
- print(summary(s.out1))
- user.prompt()
-
- ## an example for subclassification
- print("Using Zelig for analysis, with propensity score subclassification")
- z.out2 <- zelig(re78 ~ pscore, data = match.data(match.out1s,
- "control"), model="ls", by="psclass")
- x.out2 <- setx(z.out2, data = match.data(match.out1s, "treat"),
- fn = NULL, cond = TRUE)
- s.out2 <- sim(z.out2, x = x.out2, num = 100)
- print(summary(s.out2)) # overall results
- print(summary(s.out2, subset = 1)) # subclass 1
- print(summary(s.out2, subset = 2)) # subclass 2
- print(summary(s.out2, subset = 3)) # subclass 3
- user.prompt()
-}
-if(!("Zelig"%in%.packages(all=T))){
- cat("Zelig is not installed. \nFor further illustrations of analyses,\nyou may install Zelig from \nhttp://gking.harvard.edu/zelig/\n")
-}
-
-# Full matching: if package installed
-if ("optmatch"%in%.packages(all=T)) {
-print("Analysis after full matching: using fixed effects")
-foo1 <- matchit(treat ~ age + educ + black + hispan + married +
- nodegree + re74 + re75, data=lalonde, full=T)
-m1 <- lm(re78~ treat + age + educ + black + hispan + married +
- nodegree + re74 + re75 + as.factor(psclass),
- data=foo1$data)
-print(summary(m1))
-}
-if(!("optmatch"%in%.packages(all=T)))
- cat("Optmatch is not installed. \nMore information is available from \nhttp://www.stat.lsa.umich.edu/~bbh/optmatch.html \n")
diff --git a/demo/diagnose.R b/demo/diagnose.R
deleted file mode 100644
index 4f63257..0000000
--- a/demo/diagnose.R
+++ /dev/null
@@ -1,35 +0,0 @@
-# Demo of propensity score specification diagnostics
-# Initial model
-data(lalonde)
-match.out1 <- matchit(treat ~ age + married + black + hispan +
- nodegree + educ + re74 + re75, data=lalonde,
- subclass=c(0, .5, 1))
-
-summary(match.out1, verbose=F)
-
-# Create more blocks if t-stat of pscore > 2.5
-match.out1 <- matchit(treat ~ age + educ + married + nodegree + hispan
- + black + re74 + re75, data=lalonde,
- subclass=c(0,.25, .5,.75,1))
-
-summary(match.out1, verbose=T)
-
-# Add terms that are significant in > 1 block
-match.out2 <- matchit(treat ~ age + educ + married + nodegree + hispan
- + black + re74 + re75 + I(educ*black) +
- I(black*re74) + I(age*married) + I(educ*married)
- + I(educ*nodegree) + I(married*nodegree) +
- I(married*black) + I(age*nodegree),
- data=lalonde, subclass=c(0,.5,1))
-
-summary(match.out2, verbose=F)
-
-match.out2 <- matchit(treat ~ age + educ + married + nodegree + hispan
- + black + re74 + re75 + I(educ*black) +
- I(black*re74) + I(age*married) + I(educ*married)
- + I(educ*nodegree) + I(married*nodegree) +
- I(married*black) + I(age*nodegree),
- data=lalonde, subclass=c(0,.25,.5,1))
-
-summary(match.out2, verbose=F)
-
diff --git a/demo/exact.R b/demo/exact.R
new file mode 100644
index 0000000..94dfd7e
--- /dev/null
+++ b/demo/exact.R
@@ -0,0 +1,21 @@
+###
+### An Example Script for Exact Matching
+###
+
+## laod the Lalonde data
+data(lalonde)
+user.prompt()
+
+## exact matching
+m.out <- matchit(treat ~ educ + black + hispan, data = lalonde,
+ method = "exact")
+user.prompt()
+
+## print a short summary
+print(m.out)
+user.prompt()
+
+## balance diagnostics through statistics
+summary(m.out, covariates = T)
+user.prompt()
+
diff --git a/demo/full.R b/demo/full.R
new file mode 100644
index 0000000..3b8f1c2
--- /dev/null
+++ b/demo/full.R
@@ -0,0 +1,24 @@
+###
+### An Example Script for Full Matching
+###
+
+## load the Lalonde data
+data(lalonde)
+user.prompt()
+
+## conduct full matching using the propensity score based on logistic regression
+m.out <- matchit(treat ~ age + educ + black + hispan + married +
+ nodegree + re74 + re75, data = lalonde,
+ method = "full", distance = "logit")
+user.prompt()
+
+## print a short summary
+print(m.out)
+user.prompt()
+
+## balance diagnostics through statistics
+summary(m.out)
+user.prompt()
+
+## balance diagnostics through graphics
+plot(m.out)
diff --git a/demo/genetic.R b/demo/genetic.R
new file mode 100644
index 0000000..67262b6
--- /dev/null
+++ b/demo/genetic.R
@@ -0,0 +1,18 @@
+####
+#### demo file for Genetic Matching
+####
+
+## loading the lalonde data
+data(lalonde)
+
+## using logistic propensity score as one of the covariates
+m.out <- matchit(treat ~ age + educ + black + hispan + married + nodegree + re74 + re75, data = lalonde, method = "genetic", distance = "logit")
+
+## printing a short summary
+print(m.out)
+
+## numerical balance diagonstics
+summary(m.out)
+
+## graphical balance diagnostics
+plot(m.out)
diff --git a/demo/lalonde.R b/demo/lalonde.R
deleted file mode 100644
index 3b143a0..0000000
--- a/demo/lalonde.R
+++ /dev/null
@@ -1,112 +0,0 @@
-# MatchIt Demo script with Lalonde data
-
-data(lalonde)
-
-#Exact matching
-foo1 <- matchit(treat ~ black + hispan, exact=TRUE, data=lalonde)
-print(foo1)
-summary(foo1)
-summary(foo1,verbose=T)
-row.names(foo1$data)[foo1$psclass==1][1:20]
-foo1$data[foo1$psclass==2,c("black", "hispan")][1:20,]
-silent <- readline("\nPress <return> to continue: ")
-
-#Propensity score matching
-foo2 <- matchit(treat ~ re74 + re75, data=lalonde)
-print(foo2)
-summary(foo2)
-summary(foo2, verbose=T)
-plot(foo2)
-
-# Greedy vs. optimal matching
-if ("optmatch"%in%.packages(all=T)) {
-foo <- matchit(treat ~ re74+re75+age+educ, data=lalonde)
-print(summary(foo))
-foo <- matchit(treat ~ re74+re75+age+educ, data=lalonde, m.order=1)
-print(summary(foo))
-}
-
-#Propensity score with exact restriction
-foo3 <- matchit(treat ~ re74 + re75, exact=c("black","hispan"), data=lalonde)
-print(foo3)
-sum.foo3 <- summary(foo3)
-sum.foo3$sum.all[c("hispan","black"),]
-sum.foo3$sum.matched[c("hispan","black"),]
-dta.matched <- match.data(foo3)
-dta.treated <- match.data(foo3, group = "treat")
-dta.control <- match.data(foo3, group = "control")
-
-#Replication of Dehejia & Wahba
-foo <- matchit(treat ~ age + I(age^2) + educ + I(educ^2) + black +
- hispan + married + nodegree + re74 + I(re74^2) +
- re75 + I(re75^2) + I(as.numeric(re74==0)) +
- I(as.numeric(re75==0)), data=lalonde, replace=TRUE,
- discard=1)
-
-#Non-matched and discarded units
-foo$in.sample[!foo$in.sample]
-foo$psweights[foo$in.sample & foo$psweights==0]
-sdisc <- apply(lalonde[!foo$in.sample,],2,mean) #taking column-wise means
-streat <- apply(lalonde[lalonde$treat==1,],2,mean)
-round(rbind(sdisc,streat),2) #rounding
-
-#Look at weights
-print(foo$psweights[foo$data$treat==1][1:10])
-print(foo$psweights[foo$data$treat==0][1:20])
-print((foo$psweights[foo$data$treat==0]*sum(foo$psweights[foo$data$treat==1])/sum(foo$psweights[foo$data$treat==0]))[1:20])
-
-#Subclassification
-foo1 <- matchit(treat ~ age + educ + black + hispan + married +
- nodegree + re74 + re75, data=lalonde, replace=TRUE,
- subclass=6)
-foo2 <- matchit(treat ~ age + educ + black + hispan + married +
- nodegree + re74 + re75, data=lalonde, nearest=FALSE,
- replace=TRUE, subclass=6)
-print(foo2)
-plot(foo2)
-
-#Exact matching on all covariates
-foo <- matchit(treat ~ age + educ + black + hispan + married +
- nodegree + re74 + re75, data=lalonde, exact=TRUE)
-summary(foo)
-
-# Full matching
-if ("optmatch"%in%.packages(all=T)) {
-foo1 <- matchit(treat ~ age + educ + black + hispan + married +
- nodegree + re74 + re75, data=lalonde, full=T)
-print(summary(foo1))
-}
-
-# Caliper matching
-foo <- matchit(treat ~ age + educ + black + hispan + married +
- nodegree + re74 + re75, data=lalonde, caliper=0.25,
- replace=TRUE)
-
-#Mahalanobis matching
-foo <- matchit(treat ~ age + educ, data=lalonde, mahvars = c("age", "educ"),
- caliper=Inf, replace=TRUE)
-
-#Assignment Model specification
-foo1 <- matchit(treat~educ+re74+re75,data=lalonde,model="probit")
-foo2 <- matchit(treat~educ+re74+re75,data=lalonde,model="nnet")
-foo3 <- matchit(treat~educ+re74+re75,data=lalonde,model="GAM")
-foo4 <- matchit(treat~educ+re74+re75,data=lalonde,model="cart")
-
-#Using Observation Names
-test <- lalonde[1:4,] #taking a lalonde subset
-row.names(test) <- c("Dan","Kosuke","Liz","Gary") #assigning row names
-print(test)
-
-#Matching on One Covariate
-index <- rep(TRUE,nrow(lalonde)) #keeping all units
-names(index) <- row.names(lalonde) #assigning obs names
-treat <- lalonde$treat
-names(treat) <- row.names(lalonde)
-re75 <- lalonde$re75
-names(re75) <- row.names(lalonde)
-foo <- matchdef(treat~re75,index,pscore=re75,replace=TRUE,data=lalonde)
-cbind(lalonde[row.names(foo$match.matrix),]$re75,
- lalonde[as.vector(foo$match.matrix[,1]),]$re75) #matched re75
-plot(lalonde[row.names(foo$match.matrix),]$re75,
- lalonde[as.vector(foo$match.matrix[,1]),]$re75, pch=16,
- xlab="Treated", ylab="Controls") #plotting matched data
diff --git a/demo/match.data.R b/demo/match.data.R
new file mode 100644
index 0000000..9fdc12c
--- /dev/null
+++ b/demo/match.data.R
@@ -0,0 +1,46 @@
+###
+### An Example Script for Obtaining Mathced Data
+###
+
+## load the Lalonde data
+data(lalonde)
+user.prompt()
+
+## perform nearest neighbor matching
+m.out1 <- matchit(treat ~ re74 + re75 + age + educ, data = lalonde,
+ method = "nearest", distance = "logit")
+user.prompt()
+
+## obtain matched data
+m.data1 <- match.data(m.out1)
+user.prompt()
+
+## summarize the resulting matched data
+summary(m.data1)
+user.prompt()
+
+## obtain matched data for the treatment group
+m.data2 <- match.data(m.out1, group = "treat")
+user.prompt()
+
+summary(m.data2)
+user.prompt()
+
+## obtain matched data for the control group
+m.data3 <- match.data(m.out1, group = "control")
+user.prompt()
+
+summary(m.data3)
+user.prompt()
+
+## run a subclassification method
+m.out2 <- matchit(treat ~ re74 + re75 + age + educ, data=lalonde, method = "subclass")
+user.prompt()
+
+## specify different names
+m.data4 <- match.data(m.out2, subclass = "block", weights = "w",
+ distance = "pscore")
+user.prompt()
+
+## print the variable names of the matched data
+names(m.data4)
diff --git a/demo/nearest.R b/demo/nearest.R
new file mode 100644
index 0000000..1368aeb
--- /dev/null
+++ b/demo/nearest.R
@@ -0,0 +1,101 @@
+###
+### An Example Script for Nearest Neighbor Matching
+###
+data(lalonde)
+user.prompt()
+
+## 1:1 Nearest neighbor matching
+m.out <- matchit(treat ~ re74 + re75 + educ + black + hispan + age,
+ data = lalonde, method = "nearest")
+user.prompt()
+
+## print a short summary
+print(m.out)
+user.prompt()
+
+## balance diagnostics through statistics
+summary(m.out)
+user.prompt()
+
+## balance diagnostics through graphics
+plot(m.out)
+
+## 2:1 Nearest neighbor matching
+m.out1 <- matchit(treat ~ re74+re75+age+educ, data=lalonde,
+ method = "nearest", distance = "logit", ratio=2)
+user.prompt()
+
+## print a short summary
+print(m.out1)
+user.prompt()
+
+## balance diagnostics through statistics
+summary(m.out1)
+user.prompt()
+
+## balance diagnostics through graphics
+plot(m.out)
+
+## 1:1 Nearest neighbor matching with Mahalanobis matching on re74 and re75 and exact matching on married
+m.out2 <- matchit(treat ~ re74+re75+age+educ, data=lalonde,
+ method = "nearest", distance = "logit", mahvars=c("re74", "re75"), exact=c("married"), caliper=.25)
+user.prompt()
+
+## print a short summary
+print(m.out2)
+user.prompt()
+
+## balance diagnostics through statistics
+summary(m.out2)
+user.prompt()
+
+## balance diagnostics through graphics
+plot(m.out2)
+
+## 1:1 Nearest neighbor matching with units outside the common support discarded
+m.out3 <- matchit(treat ~ re74+re75+age+educ, data=lalonde,
+ method = "nearest", distance = "logit", discard= "both")
+user.prompt()
+
+## print a short summary
+print(m.out3)
+user.prompt()
+
+## balance diagnostics through statistics
+summary(m.out3)
+user.prompt()
+
+## balance diagnostics through graphics
+plot(m.out3)
+
+## 2:1 Nearest neighbor matching with replacement
+m.out4 <- matchit(treat ~ re74+re75+age+educ, data=lalonde,
+ method = "nearest", distance = "logit", replace=TRUE, ratio=2)
+user.prompt()
+
+## print a short summary
+print(m.out4)
+user.prompt()
+
+## balance diagnostics through statistics
+summary(m.out4)
+user.prompt()
+
+## balance diagnostics through graphics
+plot(m.out4)
+
+## 1:1 Nearest neighbor matching followed by subclassification
+m.out5 <- matchit(treat ~ re74+re75+age+educ, data=lalonde,
+ method = "nearest", distance = "logit", subclass=5)
+user.prompt()
+
+## print a short summary
+print(m.out5)
+user.prompt()
+
+## balance diagnostics through statistics
+summary(m.out5)
+user.prompt()
+
+## balance diagnostics through graphics
+plot(m.out5)
diff --git a/demo/optimal.R b/demo/optimal.R
new file mode 100644
index 0000000..6d5eadc
--- /dev/null
+++ b/demo/optimal.R
@@ -0,0 +1,23 @@
+###
+### An Example Script for Optimal Matching
+###
+
+## load the Lalonde data
+data(lalonde)
+user.prompt()
+
+## optimal ratio matching using the propensity score based on logistic regression
+m.out <- matchit(treat ~ re74 + re75 + age + educ, data = lalonde,
+ method = "optimal", distance = "logit", ratio = 2)
+user.prompt()
+
+## a short summary
+print(m.out)
+user.prompt()
+
+## balance diagnostics through statistics
+summary(m.out)
+user.prompt()
+
+## balance diagnostics through graphics
+plot(m.out)
diff --git a/demo/subclass.R b/demo/subclass.R
new file mode 100644
index 0000000..c968a9c
--- /dev/null
+++ b/demo/subclass.R
@@ -0,0 +1,24 @@
+###
+### An Example Script for Subclassification
+###
+
+## load the Lalonde data
+data(lalonde)
+user.prompt()
+
+## sublclassification
+m.out <- matchit(treat ~ re74 + re75 + educ + black + hispan + age,
+ data = lalonde, method = "subclass")
+user.prompt()
+
+## a short summary
+print(m.out)
+user.prompt()
+
+## balance diagnostics
+summary(m.out)
+user.prompt()
+
+## balance diagnostics through plots
+plot(m.out)
+plot(m.out, type="jitter")
diff --git a/man/diagnose.Rd b/man/diagnose.Rd
deleted file mode 100644
index 009bc9a..0000000
--- a/man/diagnose.Rd
+++ /dev/null
@@ -1,85 +0,0 @@
-\name{diagnose}
-
-\alias{diagnose}
-
-\title{Diagnostics for matching procedure}
-
-\description{\code{diagnose} is a sub-function of \code{matchit} which calculates summary
-statistics for the matching, including the weight for each unit.}
-
-
-\details{This is a sub-function of \code{matchit} which calculates summary statistics for the
-matching procedure in \code{matchit}, calculating t-statistics for covariates as well as
-weights for each unit. This function is called directly by \code{matchit} and does not
-generally need to be called directly by users; these details are included for advanced users. }
-
-\usage{ diagnose <- diagnose(formula, match.matrix, pscore, in.sample, data, exact=FALSE,
- mahvars=NULL, subclass=0, psclass=NULL, nearest=TRUE, q.cut=NULL, counter=TRUE)
-}
-
-
-\arguments{
- \item{formula}{(required). Takes the form of \code{T ~ X1 + X2}, where \code{T} is a binary
-treatment indicator and \code{X1} and \code{X2} are the pre-treatment covariates, and \code{T},
-\code{X1}, and \code{X2} are contained in the same data frame. The \code{+} symbol means
-"inclusion" not "addition." You may also include interaction terms in the form if
-\code{I(X1*X2)} or squared terms in the form of \code{I(X1^2)}.}
-
- \item{match.matrix}{(required). n1 by ratio data frame where the rows correspond to treated
-units and the columns store the names of the control units matched to each treated unit. NA
-indicates that treated unit was not matched. Generally created in \code{nearest}.}
-
- \item{pscore}{(required). Vector of estimated propensity scores. Generally calculated in
-\code{distance}.}
-
- \item{in.sample}{(required). Vector of length n showing whether each unit was eligible for
-matching due to common support restrictions with \code{discard}. Generally calculated in
-\code{distance}.}
-
- \item{data}{(required). Data frame containing the variables called in the \code{formula}.
-The dataframe should not include variables with the names \code{psclass}, \code{psweights}, or
-\code{pscore}, as these are expressly reserved in the output dataframe for MatchIt.}
-
- \item{exact}{"FALSE" (default)=no exact matching. "TRUE"=exact matching on all
-variables in \code{formula}. A vector of variable names (that are in \code{data} to indicate
-separate variables on which to exact match, in combination with matching on the propensity
-score.}
-
- \item{mahvars}{Variables on which to perform Mahalanobis matching within each caliper
-(default=NULL). Should be entered as a vector of names of variables in \code{data}.}
-
- \item{subclass}{Either a scaler specifying the number of subclasses (default=0) or a
-vector of probabilities to create quantiles based on \code{sub.by}.}
-
- \item{psclass}{Subclass index in an ordinal scale from 1 to the number of subclasses.
-Unmatched units have subclass=0. Generally computed in \code{subclassify}.}
-
- \item{nearest}{Whether to perform nearest-neighbor matching (default=TRUE). }
- \item{q.cut}{Subclass cut points. Generally calculated in \code{subclassify}.}
-
- \item{counter}{Whether to display counter indicating the progress of the matching
-(default=TRUE).} }
-
-\value{
- \item{psweights}{Vector of length n giving the weight assigned to each unit in the matching
-process. Each weight is proportional to the number of times that unit was matched.} }
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.
-}
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{methods}
-
-
-
-
-
-
-
-
diff --git a/man/distance.Rd b/man/distance.Rd
deleted file mode 100644
index 04df0e3..0000000
--- a/man/distance.Rd
+++ /dev/null
@@ -1,77 +0,0 @@
-\name{distance}
-
-\alias{distance}
-
-\title{Distance function: estimating propensity scores}
-
-\description{ The distance function calculates the distance measure to be used in the matching,
-usually the propensity score. It is a sub-function of \code{matchit}.}
-
-\details{This is a sub-function of the \code{matchit} command, which calculates the distance
-measure used in the matching, usually the propensity score. This function is called directly by
-\code{matchit} and does not generally need to be called directly by users; these details are
-included for advanced users. }
-
-
-\usage{
-distance <- distance(formula, model="logit", data, discard=0, reestimate=FALSE, counter=TRUE, ...)
-}
-
-
-\arguments{
- \item{formula}{(required). Takes the form of \code{T ~ X1 + X2}, where \code{T} is a binary
-treatment indicator and \code{X1} and \code{X2} are the pre-treatment covariates, and \code{T},
-\code{X1}, and \code{X2} are contained in the same data frame. The \code{+} symbol means
-"inclusion" not "addition." You may also include interaction terms in the form if
-\code{I(X1*X2)} or squared terms in the form of \code{I(X1^2)}.}
-
- \item{data}{(required). Data frame containing the variables called in the \code{formula}.
-The dataframe should not include variables with the names \code{psclass}, \code{psweights}, or
-\code{pscore}, as these are expressly reserved in the output dataframe for MatchIt.}
-
- \item{model}{Method used to estimate the propensity score. May be "logit" (default),
-"probit", "nnet", "GAM", or "cart".}
-
- \item{discard}{Whether to discard units that fall outside some measure of support of the
-distance score. 0 (default)=keep all units. 1=keep all units with common support. 2=discard
-only control units outside the support of the distance measure of the treated units. 3=discard
-only treated units outside the support of the distance measure of the control units.}
-
- \item{reestimate}{Specifies whether to reestimate the propensity score model after
-discarding units (default=FALSE).}
-
- \item{counter}{Whether to display counter indicating the progress of the matching
-(default=TRUE).}
-
- \item{...}{Additional arguments to be passed to \code{distance}, depending on the model
-to be used.} }
-
-\value{
- \item{in.sample}{Vector of length n showing whether each unit was eligible for matching due to
-common support restrictions with \code{discard}.}
- \item{pscore}{Vector of estimated propensity scores.}
- \item{treat}{The treatment indicator from \code{data}.}
- \item{covariates}{Covariates used in the right-hand side of the assignment model.}
- \item{assign.model}{Output of the assignment model.}
-}
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.
-}
-
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{internal}
-
-
-
-
-
-
-
-
diff --git a/man/exactmatch.Rd b/man/exactmatch.Rd
deleted file mode 100644
index 60b978f..0000000
--- a/man/exactmatch.Rd
+++ /dev/null
@@ -1,59 +0,0 @@
-\name{exactmatch}
-
-\alias{exactmatch}
-
-\title{Exact matching}
-
-\description{\code{exactmatch} is a sub-function of \code{matchit} that does exact matching
-on the set of covariates.}
-
-
-\details{This sub-function is called automatically by the \code{matchit} command when
-\code{exact=TRUE} is chosen, meaning exact matching on all covariates in the assignment model.
-It creates subclasses defined by the values of the covariates, where each subclass must contain
-some treated and some control units. Units with no exact matches in the other treatment group
-are discarded. This function is called directly by \code{matchit} and does not generally need
-to be called directly by users; these details are included for advanced users. }
-
-\usage{
-exactmatch <- exactmatch(formula, data, counter=TRUE)
-}
-
-
-\arguments{
- \item{formula}{(required). Takes the form of \code{T ~ X1 + X2}, where \code{T} is a binary
-treatment indicator and \code{X1} and \code{X2} are the pre-treatment covariates, and \code{T},
-\code{X1}, and \code{X2} are contained in the same data frame. Exact matching is done on the
-variables on the right-hand side of \code{formula}.}
-
- \item{data}{(required). Data frame containing the variables called in the \code{formula}.
-The dataframe should not include variables with the names \code{psclass}, \code{psweights}, or
-\code{pscore}, as these are expressly reserved in the output dataframe for MatchIt.}
-
- \item{counter}{Whether to display counter indicating the progress of the matching
-(default=TRUE).} }
-
-\value{
- \item{psclass}{Subclass index in an ordinal scale from 1 to the number of subclasses.
-Unmatched units have subclass=0. With exact matching, these subclasses are defined by the
-categories of the variables in \code{formula}.} }
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.
-}
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{methods}
-
-
-
-
-
-
-
-
diff --git a/man/help.matchit.Rd b/man/help.matchit.Rd
index fc9a481..c77c176 100644
--- a/man/help.matchit.Rd
+++ b/man/help.matchit.Rd
@@ -17,8 +17,10 @@ help.matchit(object)
\arguments{
\item{object}{a character string representing a Matchit command or model.
\code{help.matchit("command")} will take you to an index of Matchit
- commands and \code{help.matchit("model")} will take you to a list of
- models. }
+ commands and \code{help.matchit("method")} will take you to a list of
+ matching methods. The following inputs are currently available:
+ \code{exact}, \code{nearest}, \code{subclass}, \code{full},
+ \code{optimal}. }
}
\seealso{The complete document is available online at
diff --git a/man/incomedata.Rd b/man/incomedata.Rd
deleted file mode 100644
index f0b288d..0000000
--- a/man/incomedata.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-\name{incomedata}
-\docType{data}
-\alias{incomedata}
-\title{Simulated data to illustrate matching procedure for Matchit}
-
-\description{
- This data set is a fake data set to be used to illustrate matching methods in the
-Matchit library. It has data on 200 individuals, 100 treated and 100 control, with treatment
-assignemt, four covariates (income in 1974, income in 1975, male, and education), and one
-outcome variable (income in 1978) observed. }
-
-\usage{data(incomedata)}
-
-\format{
- A data frame with 200 observations (rows) and six variables (columns). The first
-column, "treat", is the treatment assignment (1=treatment, 0=control). The second column,
-"income74", is income in 1974 (in U.S. dollars). The third column, "income75", is income in
-1975, also in U.S. dollars. The fourth column, "male", is an indicator for male (1=male,
-0=female). The fifth column, "educ", is the education level in years of schooling. The sixth
-column is the outcome variable, income in 1978 ("income78"). }
-
-\source{Simulated data.}
-\keyword{datasets}
diff --git a/man/match.data.Rd b/man/match.data.Rd
index a7281c7..31785b3 100644
--- a/man/match.data.Rd
+++ b/man/match.data.Rd
@@ -2,27 +2,53 @@
\alias{match.data}
-\title{Output matched data sets}
+\title{Output Matched Data Sets}
-\description{The code \code{match.data} creates output data sets from a \code{matchit}
-matching algorithm.}
+\description{\code{match.data} outputs matched data sets from
+\code{matchit()}.}
\usage{
-match.data <- match.data(object, group="all")
+match.data <- match.data(object, group="all", distance = "distance",
+weights = "weights", subclass = "subclass")
}
-\arguments{
-\item{object}{(required). Stored output from \code{matchit}.}
+\arguments{
-\item{group}{Which units to output. "all" (default) gives all matched units (treated and
-control), "treat" gives just the matched treated units, and "control" gives just the matched
-control units.} }
+ \item{object}{The output object from {\tt matchit()}. This is
+ an required input.
+ }
+ \item{group}{This argument specifies for which matched group the user
+ wants to extract the data. Available options are \code{"all"} (all
+ matched units), \code{"treat"} (matched units in the treatment
+ group), and \code{"control"} (matched units in the control
+ group). The default is \code{"all"}.
+ }
+ \item{distance}{This argument specifies the variable name used to
+ store the distance measure. The default is \code{"distance"}.
+ }
+ \item{weights}{This argument specifies the variable name used to store
+ the resulting weights from matching. The default is
+ \code{"weights"}.
+ }
+ \item{subclass}{This argument specifies the variable name used to store the
+ subclass indicator. The default is \code{"subclass"}.
+ }
+}
+
+\value{
-\value{
-\item{Returns a subset of the original data set sent to \code{matchit}, with just the matched units. The data set also contains the additional
-variables \code{psclass}, \code{pscore}, and \code{psweights}. The variable \code{psclass} gives the subclass index for each unit (if applicable).
-The variable \code{pscore} gives the propensity scores, and \code{psweights} gives the weights for each unit, generated in the matching procedure.
-See the \code{matchit} documentation for more details.}
+ \item{Returns a subset of the original data set sent to
+ \code{matchit()}, with just the matched units. The data set also
+ contains the additional variables \code{distance}, \code{weights},
+ and \code{subclass}. The variable \code{distance} gives
+ the estimated distance measure, and \code{weights} gives the
+ weights for each unit, generated in the matching procedure.
+ The variable \code{subclass} gives the subclass
+ index for each unit (if applicable).
+ See the \url{http://gking.harvard.edu/matchit/} for the complete
+ documentation and type \code{demo(match.data)} at the R prompt to
+ see a demonstration of the code.
+ }
}
\seealso{Please use \code{help.matchit} to access the matchit reference
@@ -30,7 +56,8 @@ See the \code{matchit} documentation for more details.}
\url{http://gking.harvard.edu/matchit}.}
\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
+ Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai
+ <\email{kimai at princeton.edu}>; Gary King
<\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
}
diff --git a/man/matchdef.Rd b/man/matchdef.Rd
deleted file mode 100644
index 821fa4e..0000000
--- a/man/matchdef.Rd
+++ /dev/null
@@ -1,85 +0,0 @@
-\name{matchdef}
-
-\alias{matchdef}
-
-\title{Nearest neighbor matching algorithm}
-
-\description{ This is a sub-function of \code{matchit} which performs the nearest neighbor
-matching; \code{matchdef} is called automatically by \code{matchit}. }
-
-
-\details{The matching is done using nearest neighbor matching on the propensity score, estimated
-in the sub-function \ref{distance}. This function is called directly by \code{matchit} and
-does not generally need to be called directly by users; these details are included for advanced
-users. }
-
-\usage{
-matchdef <- matchdef(formula, in.sample, pscore, nearest=TRUE, replace=FALSE,
- m.order=2, ratio=1, caliper=0, calclosest=FALSE, mahvars=NULL, exact=FALSE,
- data=NULL, counter=TRUE)
-}
-
-
-\arguments{
- \item{formula}{(required). Takes the form of \code{T ~ X1 + X2}, where \code{T} is a binary
-treatment indicator and \code{X1} and \code{X2} are the pre-treatment covariates, and \code{T},
-\code{X1}, and \code{X2} are contained in the same data frame. The \code{+} symbol means
-"inclusion" not "addition." You may also include interaction terms in the form if
-\code{I(X1*X2)} or squared terms in the form of \code{I(X1^2)}.}
-
- \item{data}{Data frame containing the variables in \code{formula}.}
- \item{in.sample}{Vector of length n showing whether each unit was eligible for matching due to
-common support restrictions with \code{discard}. Computed in \code{distance}.}
-
- \item{pscore}{Vector of propensity scores, calculated in \code{distance}.}
-
- \item{nearest}{Whether to perform nearest-neighbor matching (default=TRUE). }
- \item{replace}{Whether to match with replacement (default=FALSE). }
- \item{m.order}{Order in which to match treated units with control units. 1=optimal (requires ``optmatch" package),
-2 (default)=from high to low, 3=from low to high, 4=random order.}
- \item{ratio}{The number of control units to be matched to each treated unit (default=1).}
-
- \item{caliper}{Standard deviations of the propensity score within which to draw control
-units (default=0).}
-
- \item{calclosest}{If \code{caliper!=0}, whether to take the nearest available match if
-no matches are available within \code{caliper} (default=FALSE).}
-
- \item{mahvars}{Variables on which to perform Mahalanobis matching within each caliper
-(default=NULL). Should be entered as a vector of names of variables in \code{data}.}
-
- \item{exact}{"FALSE" (default)=no exact matching. "TRUE"=exact matching on all
-variables in \code{formula}. A vector of variable names (that are in \code{data}) to indicate
-separate variables on which to exact match, in combination with matching on the propensity
-score.}
-
- \item{counter}{Whether to display counter indicating the progress of the matching
-(default=TRUE).} }
-
-\value{
- \item{match.matrix}{n1 by ratio data frame where the rows correspond to treated units and the
-columns store the names of the control units matched to each treated unit. NA indicates that
-treated unit was not matched.}
-
- \item{in.sample}{Vector of length n showing whether each unit was eligible for matching due to
-common support restrictions with \code{discard}.} }
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.
-}
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{internal}
-
-
-
-
-
-
-
-
diff --git a/man/matchit.Rd b/man/matchit.Rd
index 69f81fd..dbba8c6 100644
--- a/man/matchit.Rd
+++ b/man/matchit.Rd
@@ -3,123 +3,171 @@
\alias{matchit}
\alias{MatchIt}
\alias{Matchit}
-\alias{i}
-
-\title{Matchit: Matching Software for Causal Inference}
-
-\description{
-\emph{Matchit} enables parametric models for causal inference to work better by selecting
-well-matched subsets of the original treated and control groups. MatchIt implements the
-suggestions of Ho, Imai, King, and Stuart (2004) for improving parametric statistical models by
-preprocessing data with nonparametric matching methods. MatchIt implements a wide range of
-sophisticated matching methods, making it possible to greatly reduce the dependence of causal
-inferences on hard-to-justify, but commonly made, statistical modeling assumptions. The
-software also easily fits into existing research practices since, after preprocessing with
-MatchIt, researchers can use whatever parametric model they would have used without MatchIt, but
-produce inferences with substantially more robustness and less sensitivity to modeling
-assumptions. Matched data sets created by MatchIt can be entered easily in Zelig
-(\url{http://gking.harvard.edu/zelig}) for subsequent parametric analyses. Full documentation is
-available online at \url{http://gking.harvard.edu/matchit}, and help for specific commands is
-available through \code{help.matchit}.}
-
-\details{The matching is done using the \code{matchit(treat ~ X, ...)} command, where
-\code{treat} is the vector of treatment assignments and \code{X} are the covariates to be used
-in the matching. There are a number of matching options, detailed below. The full syntax is
-\code{matchit(formula, data=NULL, discard=0, exact=FALSE, replace=FALSE, ratio=1, model="logit",
-reestimate=FALSE, nearest=TRUE, m.order=2, caliper=0, calclosest=FALSE, mahvars=NULL,
-subclass=0, sub.by="treat", counter=TRUE, full=FALSE, full.options=list(), \dots)} A summary of the results can be seen
-graphically
-using \code{plot(matchitobject)}, or numerically using \code{summary(matchitobject)}.
+
+\title{MatchIt: Matching Software for Causal Inference}
+
+\description{ \code{matchit} is the main command of the package
+\emph{MatchIt}, which enables parametric models for causal inference to
+work better by selecting well-matched subsets of the original treated
+and control groups. MatchIt implements the suggestions of Ho, Imai,
+King, and Stuart (2004) for improving parametric statistical models by
+preprocessing data with nonparametric matching methods. MatchIt
+implements a wide range of sophisticated matching methods, making it
+possible to greatly reduce the dependence of causal inferences on
+hard-to-justify, but commonly made, statistical modeling assumptions.
+The software also easily fits into existing research practices since,
+after preprocessing with MatchIt, researchers can use whatever
+parametric model they would have used without MatchIt, but produce
+inferences with substantially more robustness and less sensitivity to
+modeling assumptions. Matched data sets created by MatchIt can be
+entered easily in Zelig (\url{http://gking.harvard.edu/zelig}) for
+subsequent parametric analyses. Full documentation is available online
+at \url{http://gking.harvard.edu/matchit}, and help for specific
+commands is available through \code{help.matchit}.}
+
+\details{The matching is done using the \code{matchit(treat ~ X, ...)}
+command, where \code{treat} is the vector of treatment assignments and
+\code{X} are the covariates to be used in the matching. There are a
+number of matching options, detailed below. The full syntax is
+\code{matchit(formula, data=NULL, discard=0, exact=FALSE, replace=FALSE,
+ratio=1, model="logit", reestimate=FALSE, nearest=TRUE, m.order=2,
+caliper=0, calclosest=FALSE, mahvars=NULL, subclass=0, sub.by="treat",
+counter=TRUE, full=FALSE, full.options=list(), \dots)} A summary of the
+results can be seen graphically using \code{plot(matchitobject)}, or
+numerically using \code{summary(matchitobject)}.
\code{print(matchitobject)} also prints out the output. }
-\usage{
-matchit <- matchit(formula, data, model="logit", discard=0, reestimate=FALSE, nearest=TRUE,
- replace=FALSE, m.order=2, ratio=1, caliper=0, calclosest=FALSE,
- subclass=0, sub.by="treat", mahvars=NULL, exact=FALSE, counter=TRUE, full=FALSE, full.options=list(),...)
+\usage{matchit(formula, data, method = "nearest", distance = "logit",
+ distance.options = list(), discard = "none",
+ reestimate = FALSE, ...)
}
\arguments{
- \item{formula}{(required). Takes the form of \code{T ~ X1 + X2}, where \code{T} is a binary
-treatment indicator and \code{X1} and \code{X2} are the pre-treatment covariates, and \code{T},
-\code{X1}, and \code{X2} are contained in the same data frame. The \code{+} symbol means
-"inclusion" not "addition." You may also include interaction terms in the form if
-\code{I(X1*X2)} or squared terms in the form of \code{I(X1^2)}.}
-
- \item{data}{(required). Data frame containing the variables called in the \code{formula}.
-The dataframe should not include variables with the names \code{psclass}, \code{psweights}, or
-\code{pscore}, as these are expressly reserved in the output dataframe for MatchIt.}
-
- \item{model}{Method used to estimate the propensity score. May be "logit" (default),
-"probit", "nnet", "GAM", or "cart".}
-
- \item{discard}{Whether to discard units that fall outside some measure of support of the
-distance score. 0 (default)=keep all units. 1=keep all units with common support. 2=discard
-only control units outside the support of the distance measure of the treated units. 3=discard
-only treated units outside the support of the distance measure of the control units.}
-
- \item{reestimate}{Specifies whether to reestimate the propensity score model after
-discarding units (default=FALSE).}
- \item{nearest}{Whether to perform nearest-neighbor matching (default=TRUE). }
- \item{replace}{Whether to match with replacement (default=FALSE). }
- \item{m.order}{Order in which to match treated units with control units. 1=optimal (requires ``optmatch"
-package), 2 (default)=from high to low, 3=from low to high, 4=random order.}
-
- \item{ratio}{The number of control units to be matched to each treated unit (default=1).}
- \item{caliper}{Standard deviations of the propensity score within which to draw control
-units (default=0).}
-
- \item{calclosest}{If \code{caliper!=0}, whether to take the nearest available match if
-no matches are available within \code{caliper} (default=FALSE).}
-
- \item{subclass}{Either a scaler specifying the number of subclasses (default=0) or a
-vector of probabilities to create quantiles based on \code{sub.by}.}
-
- \item{sub.by}{If \code{subclass!=0}, by what criteria to subclassify. "treat" (default)
-=by the number of treated units, "control"=by the number of control units, "all"=by the total
-number of units.}
-
- \item{mahvars}{Variables on which to perform Mahalanobis matching within each caliper
-(default=NULL). Should be entered as a vector of names of variables in \code{data}.}
-
- \item{exact}{"FALSE" (default)=no exact matching. "TRUE"=exact matching on all
-variables in \code{formula}. A vector of variable names (that are in \code{data} to indicate
-separate variables on which to exact match, in combination with matching on the propensity
-score.}
-
- \item{counter}{Whether to display counter indicating the progress of the matching
-(default=TRUE).}
-
- \item{full}{Whether to do full matching (default=FALSE). Requires ``optmatch" package.}
- \item{full.options}{Additional options for full matching.}
-
- \item{...}{Additional arguments to be passed to \code{matchit}, depending on the model
-to be used.}
+ \item{formula}{This argument takes the usual syntax of R formula,
+ \code{treat ~ x1 + x2}, where \code{treat} is a binary treatment
+ indicator and \code{x1} and \code{x2} are the pre-treatment
+ covariates. Both the treatment indicator and pre-treatment covariates
+ must be contained in the same data frame, which is specified as
+ \code{data} (see below). All of the usual R syntax for formula
+ works. For example, \code{x1:x2} represents the first order
+ interaction term between \code{x1} and \code{x2}, and \code{I(x1 \^\
+ 2)} represents the square term of \code{x1}. See \code{help(formula)}
+ for details.
+ }
+ \item{data}{This argument specifies the data frame containing the
+ variables called in \code{formula}.
+ }
+ \item{method}{This argument specifies a matching method. Currently,
+ \code{"exact"} (exact matching), \code{"full"} (full matching),
+ \code{"genetic"} (genetic matching), \code{"nearest"} (nearest
+ neighbor matching), \code{"optimal"} (optimal matching), and
+ \code{"subclass"} (subclassification) are available. The default is
+ \code{"nearest"}. Note that within each of these matching methods,
+ \emph{MatchIt} offers a variety of options. See
+ \url{http://gking.harvard.edu/matchit/docs/Inputs.html} for the
+complete list
+ of supported options.
+ }
+ \item{distance}{This argument specifies the method used to estimate the
+ distance measure. The default is logistic regression,
+ \code{"logit"}. A variety of other methods are available. See
+ \url{http://gking.harvard.edu/matchit/docs/All_Matching_Methods.html}
+for the complete list of
+ supported methods.
+ }
+ \item{distance.options}{ This optional argument specifies the optional
+ arguments that are passed to the model for estimating the distance
+ measure. The input to this argument should be a list.
+ }
+ \item{discard}{This argument specifies whether to discard units that
+ fall outside some measure of support of the distance score before
+ matching, and not allow them to be used at all in the matching
+ procedure. Note that discarding units may change the quantity of
+ interest being estimated.
+
+ \item{none}{(default) discards no units before matching.}
+ \item{both}{discards all units (treated and control) that are
+ outside the support of the distance measure.
+ }
+ \item{control}{discards only control units outside the
+ support of the distance measure of the treated units.
+ }
+ \item{treat}{discards only treated units outside the support
+ of the distance measure of the control units.
+ }
+ }
+ \item{reestimate}{This argument specifies whether the model for
+ distance measure should be re-estimated after units are
+ discarded. The input must be a logical value. The default is
+ \code{FALSE}.
+ }
+ \item{...}{Additional arguments to be passed to a variety of matching
+ methods. See \url{http://gking.harvard.edu/matchit/??} for the
+ complete list of options.
+ }
}
\value{
- \item{call}{The original \code{matchit} call.}
- \item{formula}{Formula used to specify the propensity score.}
- \item{match.matrix}{n1 by ratio data frame where the rows correspond to treated units and the
-columns store the names of the control units matched to each treated unit. NA indicates that
-treated unit was not matched.}
-
- \item{in.sample}{Vector of length n showing whether each unit was eligible for matching due to
-common support restrictions with \code{discard}.}
-
- \item{matched}{Vector of length n showing whether each unit was matched.}
-
- \item{psweights}{Vector of length n giving the weight assigned to each unit in the matching
-process. Each weight is proportional to the number of times that unit was matched.}
-
- \item{psclass}{Subclass index in an ordinal scale from 1 to the number of subclasses.
-Unmatched units have subclass=0.}
- \item{q.cut}{Subclass cut points.}
- \item{assign.model}{Output of the assignment model.}
- \item{data}{The original data set, with \code{psclass}, \code{psweights}, and \code{pscore}
-(propensity scores) added.}
- \item{treat}{The treatment indicator from \code{data}.}
- \item{covariates}{Covariates used in the right-hand side of the assignment model.}
+ \item{call}{The original \code{matchit} call.
+ }
+ \item{formula}{The formula used to specify the model for
+ estimating the distance measure.
+ }
+ \item{model}{The output of the model used to estimate
+ the distance measure. \code{summary(m.out$model)} will give the
+ summary of the model where \code{m.out} is the output object from
+ \code{matchit}.
+ }
+ \item{match.matrix}{An \eqn{n_1} by \code{ratio} matrix
+ where\cr\cr
+ the row names, which can be obtained through
+ \code{row.names(match.matrix)}, represent the names of the
+ treatment units, which come from the data frame specified in
+ \code{data}.\cr\cr
+
+ each column stores the name(s) of the control unit(s) matched
+ to the treatment unit of that row. For example, when the
+ \code{ratio} input for nearest neighbor or optimal matching is
+ specified as 3, the three columns of
+ \code{match.matrix} represent the three control units matched to
+ one treatment unit).\cr\cr
+
+ \code{NA} indicates that the treatment unit was not matched.
+ }
+ \item{discarded}{A vector of length $n$ that displays
+ whether the units were ineligible for matching due to common
+ support restrictions. It equals \code{TRUE} if unit \eqn{i} was
+ discarded, and it is set to \code{FALSE} otherwise.
+ }
+ \item{distance}{A vector of length \eqn{n} with the estimated
+ distance measure for each unit.
+ }
+ \item{weights}{A vector of length \eqn{n} that provides the
+ weights assigned to each unit in the matching process. Unmatched
+ units have weights equal to \code{0}. Matched treated units have
+ weight \code{1}. Each matched control unit has weight proportional
+ to the number of treatment units to which it was matched, and the sum of
+ the control weights is equal to the number of uniquely matched
+ control units. See
+\url{http://gking.harvard.edu/matchit/docs/How_Exactly_are.html} for
+more
+ details.
+ }
+ \item{subclass}{The subclass index in an ordinal
+ scale from 1 to the total number of subclasses as specified in
+ \code{subclass} (or the total number of subclasses from full or
+ exact matching). Unmatched units have \code{NA}.
+ }
+ \item{q.cut}{The subclass cut-points that classify the
+ distance measure.
+ }
+ \item{treat}{The treatment indicator from
+ \code{data} (the left-hand side of \code{formula}).
+ }
+ \item{X}{The covariates used for estimating the
+ distance measure (the right-hand side of \code{formula}).
+ }
}
\seealso{Please use \code{help.matchit} to access the matchit reference
@@ -127,13 +175,15 @@ Unmatched units have subclass=0.}
\url{http://gking.harvard.edu/matchit}.
}
-\references{Daniel Ho, Kosuke Imai, Gary King, and Elizabeth Stuart (2004)
-}
+\references{Daniel Ho, Kosuke Imai, Gary King, and Elizabeth Stuart
+ (2004) `Matching as Nonparametric Preprocessing for Improving Parametric
+ Causal Inference,'' preprint available at
+ \url{http://gking.harvard.edu/files/abs/matchp-abs.shtml} }
\author{
Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
+ <\email{king at harvard.edu}>; Elizabeth
+ Stuart<\email{stuart at stat.harvard.edu}> }
\keyword{environment}
diff --git a/man/neyman.Rd b/man/neyman.Rd
deleted file mode 100644
index 6846ff5..0000000
--- a/man/neyman.Rd
+++ /dev/null
@@ -1,47 +0,0 @@
-\name{neyman}
-
-\alias{neyman}
-\alias{print.neyman}
-
-\title{Estimating treatment effects: Neyman's method}
-
-\description{Calculates basic estimate of treatment effect, using simple difference in means and
-Neyman's estimate of the variance.}
-
-\details{Calculates overall treatment effect estimate using matched samples generated by
-\code{matchit}. If subclasses were generated in the matching procedure, the estimate is a
-weighted average over the subclasses, with subclass weights defined by the \code{sub.by} option
-in the \code{matchit} call. The standard deviation can also be estimated using the bootstrap
-procedure, which is often used when matching done with replacement. For more details on the
-calculations, see the complete documentation (link below). The \code{summary} command on a
-\code{neyman} object will provide a test of significance of the outcome as well as sample sizes
-of the matched treated and control groups, and subclass estimates if applicable. }
-
-\usage{
-neyman <- neyman(Y, object, bootstrap=NULL, counter=TRUE)
-}
-
-\arguments{
-\item{Y}{(required). Outcome variable of interest, included in \code{data} from
-\code{matchit} object.}
-\item{object}{(required). Stored output from \code{matchit}.}
-\item{bootstrap}{Use the bootstrap to estimate standard errors (default=FALSE). Often used when
-matching done with replacement.}
-\item{counter}{Counter indicating progress of the bootstrap
-(default=TRUE, which shows the counter).}
-}
-
-\value{
-\item{Returns estimate of the effect of the treatment, as well as the standard deviation
-of that estimate.} }
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.}
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{htest}
diff --git a/man/plot.matchit.Rd b/man/plot.matchit.Rd
deleted file mode 100644
index 27ff439..0000000
--- a/man/plot.matchit.Rd
+++ /dev/null
@@ -1,39 +0,0 @@
-\name{plot.matchit}
-
-\alias{plot.matchit}
-
-\title{Graphing Quantities of Interest}
-
-\description{ The \code{matchit} method for the generic \code{plot}
- command generates default plots.}
-
-\usage{
-\method{plot}{matchit}(x, ...)
-}
-
-\arguments{
-\item{x}{stored output from \code{\link{matchit}}. }
-\item{\dots}{Additional parameters passed to \code{plot.default}.
- Because \code{plot.matchit} primarily produces diagnostic plots, many
- of these parameters are hard-coded for convenience and
- presentation. }
-}
-
-\value{
-\item{\code{plot.matchit} allows the user to check the distribution of all covariates
-in the assignment model, squares, and interactions, within all subclasses. The graphs present
-density estimate graphs of the propensity score of treated and control units in the full and
-matched samples, jitter plots of the propensity score for treated and control units, density
-estimate graphs of any covariates, and density estimate graphs of any covariates by subclass.}}
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.
-}
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{hplot}
diff --git a/man/print.matchit.Rd b/man/print.matchit.Rd
deleted file mode 100644
index 506afc8..0000000
--- a/man/print.matchit.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-\name{print.matchit}
-
-\alias{print.matchit}
-
-\title{Printing Quantities of Interest}
-
-\description{ The \code{matchit} method for the generic \code{print}
- command generates default output.
-}
-
-\usage{
-\method{print}{matchit}(x, digits = max(3, getOption("digits") - 3), ...)
-}
-
-\arguments{
-\item{x}{stored output from \code{matchit}. }
-\item{digits}{Optionally specify the significant digits to be displayed.}
-\item{\dots}{Additional parameters passed to \code{print.default}.}
-}
-
-\value{
-\item{Returns the original assignment model call, summary statistics of the propensity
-score for the full and matched samples, summary statistics of the propensity score for each
-subclass (if applicable), and sample sizes for the full and matched samples (full and by
-subclass).} }
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.}
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{methods}
diff --git a/man/subclassify.Rd b/man/subclassify.Rd
deleted file mode 100644
index 99c2e21..0000000
--- a/man/subclassify.Rd
+++ /dev/null
@@ -1,85 +0,0 @@
-\name{subclassify}
-
-\alias{subclassify}
-
-\title{Subclassify units}
-
-\description{This is a sub-function of \code{matchit} which generates subclasses of units
-given a propensity score.}
-
-\details{This function creates subclasses of units based on their covariate values, usually
-defined by the propensity score. This function is called directly by \code{matchit} and does
-not generally need to be called directly by users; these details are included for advanced
-users. }
-
-\usage{
-subclassify <- subclassify(formula, data, in.sample, pscore, nearest=TRUE, match.matrix,
- subclass=0, sub.by="treat", counter=TRUE, full=FALSE, full.options=list())
-}
-
-
-\arguments{
- \item{formula}{(required). Takes the form of \code{T ~ X1 + X2}, where \code{T} is a binary
-treatment indicator and \code{X1} and \code{X2} are the pre-treatment covariates, and \code{T},
-\code{X1}, and \code{X2} are contained in the same data frame. The \code{+} symbol means
-"inclusion" not "addition." You may also include interaction terms in the form if
-\code{I(X1*X2)} or squared terms in the form of \code{I(X1^2)}.}
-
- \item{data}{(required). Data frame containing the variables called in the \code{formula}.
-The dataframe should not include variables with the names \code{psclass}, \code{psweights}, or
-\code{pscore}, as these are expressly reserved in the output dataframe for MatchIt.}
-
- \item{in.sample}{(required). Vector of length n showing whether each unit was eligible for
-matching due to common support restrictions with \code{discard}. Generally created by
-\code{distance}.}
-
- \item{pscore}{(required). Vector of estimated propensity scores. Generally created by
-\code{distance}.}
-
- \item{nearest}{Whether to perform nearest-neighbor matching (default=TRUE). }
-
- \item{match.matrix}{(required) n1 by ratio data frame where the rows correspond to treated
-units and the columns store the names of the control units matched to each treated unit. NA
-indicates that treated unit was not matched. Generally created by \code{matchdef}.}
-
- \item{subclass}{Either a scaler specifying the number of subclasses (default=0) or a
-vector of probabilities to create quantiles based on \code{sub.by}.}
-
- \item{sub.by}{If \code{subclass!=0}, by what criteria to subclassify. "treat" (default)
-=by the number of treated units, "control"=by the number of control units, "all"=by the total
-number of units.}
-
- \item{counter}{Whether to display counter indicating the progress of the matching
-(default=TRUE).}
-
- \item{full}{Whether to do full matching (default=FALSE). Requires ``optmatch" package.}
- \item{full.options}{Additional options for full matching.}
-
-
-}
-
-\value{
- \item{psclass}{Subclass index in an ordinal scale from 1 to the number of subclasses.
-Unmatched units have subclass=0.}
- \item{q.cut}{Subclass cut points.}
-}
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.
-}
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{internal}
-
-
-
-
-
-
-
-
diff --git a/man/summary.matchit.Rd b/man/summary.matchit.Rd
deleted file mode 100644
index adae6df..0000000
--- a/man/summary.matchit.Rd
+++ /dev/null
@@ -1,65 +0,0 @@
-\name{summary.matchit}
-
-\alias{summary.matchit}
-\alias{print.summary.matchit}
-
-\title{Summarizing Quantities of Interest}
-
-\description{ The \code{matchit} method for the generic \code{summary}
- command generates default output.
-}
-
-\usage{
-\method{summary}{matchit}(object, verbose=F, sig=2, ...)
-}
-
-\arguments{
-\item{object}{stored output from \code{matchit}.}
-
-\item{sig}{Optionally specifies the size of the t-statistic for a covariate to be labeled
-"problematic" (default=2).}
-
-\item{verbose}{Option to show summary statistics in \code{sum.all} and \code{sum.matched} for
-all covariates, their squares, and two-way interactions when \code{verbose=TRUE} and only the
-covariates themselves when \code{verbose=FALSE} (default).}
-
-\item{\dots}{Additional parameters
-passed to \code{summary.default}.} }
-
-\value{
-\item{call}{The original assignment model call.}
-
-\item{sum.all}{Data frame containing summary statistics on all observations in the treated and
-control groups (means, standard deviations, t-statistics and bias statistics for all
-covariates).}
-
-\item{sum.matched}{Data frame containing summary statistics on the matched observations in the
-treated and control groups (means, standard deviations, t-statistics, bias statistics, and
-whether bias was reduced by the matching).}
-
-\item{Problematic}{Problematic covariates that remain imbalanced in the matched samples.
-Variables listed when the absolute value of the t-statistic in the matched sample exceeds
-\code{sig}.}
-
-\item{xn}{Sample sizes in the full and matched samples.}
-
-\item{q.table}{Array that contains the same information as \code{sum.all} and \code{sum.matched}
-for each subclass.}
-
-\item{qn}{Sample sizes in the full and matched samples, by subclass.}
-
-\item{match.matrix}{From the \code{matchit} output.}
-
-\item{psclass}{From the \code{matchit} output.}
-\item{in.sample}{From the \code{matchit} output.} }
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.}
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{methods}
diff --git a/man/summary.neyman.Rd b/man/summary.neyman.Rd
deleted file mode 100644
index 9a9fa62..0000000
--- a/man/summary.neyman.Rd
+++ /dev/null
@@ -1,44 +0,0 @@
-\name{summary.neyman}
-
-\alias{summary.neyman}
-\alias{print.summary.neyman}
-
-\title{Summarizing the estimated treatment effects}
-
-\description{Calculates basic estimate of treatment effect, using simple difference in means and
-Neyman's estimate of the variance. Also provides test of significance and subclass estimates if
-applicable.}
-
-\details{Calculates overall treatment effect estimate using matched samples generated by
-\code{matchit}. Also gives test of significance of the estimated treatment effect, and subclass
-estimates, if applicable. If subclasses were generated in the matching procedure, an overall
-estimate is also provided, which is a weighted average over the subclasses, with subclass
-weights defined by the \code{sub.by} option in the \code{matchit} call. The standard deviation
-can also be estimated using the bootstrap procedure, which is often used when matching done with
-replacement. For more details on the calculations, see the complete documentation (link below).
-}
-
-\usage{
-\method{summary}{neyman}(object, ...)
-}
-
-\arguments{
-\item{object}{(required). Stored output from \code{neyman}. }
-\item{...}{Further arguments passed to or from other methods.}
-}
-
-\value{ \item{Returns estimate of the effect of the treatment, as well as the standard deviation
-of that estimate, a test of significance of the estimate, and subclass estimates if applicable.
-Also shows the sample sizes of the matched treated and control groups used to estimate the
-treatment effect.} }
-
-\seealso{Please use \code{help.matchit} to access the matchit reference
- manual. The complete document is available online at
- \url{http://gking.harvard.edu/matchit}.}
-
-\author{
- Daniel Ho <\email{daniel.ho at yale.edu}>; Kosuke Imai <\email{kimai at princeton.edu}>; Gary King
- <\email{king at harvard.edu}>; Elizabeth Stuart<\email{stuart at stat.harvard.edu}>
-}
-
-\keyword{methods}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-matchit.git
More information about the debian-science-commits
mailing list