Multiple Imputation for Regression in R

Arndt Regorz, Dipl. Kfm. & M.Sc. Psychology, 11/20/2022


Here is the R code for the Youtube tutorial about multiple imputation for regression analysis in R.

# install.packages("mice")

library(mice)

# Using the dataset nhanes2 from the mice package
?nhanes2
summary(nhanes2)
str(nhanes2) #important: factor variables defined as factors!
md.pattern(nhanes2)

# Ordinary regression with listwise exclusion of missing data
reg.fit <- lm(chl ~ bmi + hyp, data=nhanes2)
summary(reg.fit)

# Multiple Imputation with the mice package
#=========================================

# 1. Imputation

imp.data <- mice (data = nhanes2, m = 50, maxit = 10, seed = 12345, print=FALSE)

# Which methods were used?
imp.data

#====================================================

# (If you want to look at the imputed datasets:
imp.datasets <- complete(imp.data, "long")
imp.datasets
# In general, I don't do this)

# (If we wanted to change the method
methods(mice)
my.method <- c("", "pmm", "logreg", "norm")
imp.data2 <- mice (data = nhanes2, m = 50, maxit = 10, seed = 12345,
method = my.method, print=FALSE) imp.data2
# But we use the default values from above for the rest of the tutorial)

#====================================================

# Checking if convergence was achieved
plot(imp.data)

# Checking if imputed data has plausible values
stripplot(imp.data)

#(for the changed estimation method:)
stripplot(imp.data2)

# 2. Regression for each imputed dataset

reg.fit.mi <- with(imp.data, lm(chl ~ bmi + hyp))

reg.fit.mi

# Regression result for e.g. 3rd imputation
summary(reg.fit.mi$analyses[[3]])

# 3. Pooling the results

pool.fit <- pool(reg.fit.mi)

summary(pool.fit)

# Technical information:
pool.fit
?mipo

#R2

pool.r2 <- pool.r.squared(reg.fit.mi)

pool.r2

# Comparison with listwise exclusion (again)
summary(reg.fit)