#################################################### ## You can copy and paste this code into your R ## ## GUI to reproduce the simulation results in ## ## the paper, "Spike-and-Slab Meets Lasso: A ## ## Review of the Spike-and-Slab Lasso" by Ray ## ## Bai, Veronika Rockova, and Edward I. George ## #################################################### ## First specify the packages of interest packages = c("SSLASSO", "mvtnorm", "ncvreg") ## Now load or install & load all package.check <- lapply( packages, FUN = function(x) { if (!require(x, character.only = TRUE)) { install.packages(x, dependencies = TRUE) library(x, character.only = TRUE) } } ) ## Set seed set.seed(12345) ################ ## Parameters ## ################ # p: number of predictors # n: number of observations # Y: response, n x 1 vector # X: predictors, n x p matrix # beta: regression coefficients, p x 1 vector # sigma_true: true error standard deviation # q: number of non-zero beta # Sigma: covariance of predictors, X # rho: correlation in blocks of Sigma # n.rep: number of replications p <- 1000 n <- 100 q <- 6 rho <- 0.9 block <- matrix(rho, 50, 50) Sigma <- diag(20)%x%block diag(Sigma) <- 1 sigma_true <- sqrt(3) n.rep <- 500 ## Results for MSE ssl.mse <- rep(0, n.rep) lasso.mse <- rep(0, n.rep) scad.mse <- rep(0, n.rep) mcp.mse <- rep(0, n.rep) ## Results for MPE ssl.mpe <- rep(0, n.rep) lasso.mpe <- rep(0, n.rep) scad.mpe <- rep(0, n.rep) mcp.mpe <- rep(0, n.rep) ## Results for model size ssl.size <- rep(0, n.rep) lasso.size <- rep(0, n.rep) scad.size <- rep(0, n.rep) mcp.size <- rep(0, n.rep) ## Results for FDR ssl.fdr <- rep(0, n.rep) lasso.fdr <- rep(0, n.rep) scad.fdr <- rep(0, n.rep) mcp.fdr <- rep(0, n.rep) ## Results for FNR ssl.fnr <- rep(0, n.rep) lasso.fnr <- rep(0, n.rep) scad.fnr <- rep(0, n.rep) mcp.fnr <- rep(0, n.rep) ## Results for MCC ssl.mcc <- rep(0, n.rep) lasso.mcc <- rep(0, n.rep) scad.mcc <- rep(0, n.rep) mcp.mcc <- rep(0, n.rep) ################################ ## Fit models and get results ## ## for 500 replications ## ################################ for(i in 1:n.rep){ ## Generate data X <- rmvnorm(n, numeric(p), Sigma) index <- c(1, 51, 101, 151, 201, 251) neg_index <- (1:p)[-c(index)] beta <- numeric(p) beta[index] <- c(-3.5,-2.5,-1.5,1.5,2.5,3.5) y <- X %*% beta + rnorm(n, sd=sigma_true) ########################## ## Spike-and-slab lasso ## ########################## ssl.mod <- SSLASSO(X, y, variance = "unknown") beta.hat <- ssl.mod$beta[,ncol(ssl.mod$beta)] ## MSE, MPE, and model size ssl.mse[i] <- (1/p)*sum((beta.hat-beta)^2) ssl.mpe[i] <- (1/n)*sum((X%*%(beta.hat-beta))^2) ssl.size[i] <- length(which(beta.hat != 0)) ## TP, TN, FP, FN tp = length(which(beta != 0 & beta.hat != 0)) tn = length(which(beta == 0 & beta.hat == 0)) fp = length(which(beta == 0 & beta.hat != 0)) fn = length(which(beta != 0 & beta.hat == 0)) ## FDR, FNR, and MCC ssl.fdr[i] <- fp/(tn+fp) ssl.fnr[i] <- fn/(tp+fn) ssl.mcc[i] <- (tp*tn - fp*fn)/(sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))) ########### ## lasso ## ########### cv.lasso <- cv.ncvreg(X, y, family=c("gaussian"), penalty="lasso", warn=FALSE) lasso.mod <- cv.lasso$fit beta.hat <- lasso.mod$beta[-1, cv.lasso$min] ## Prediction error and model size lasso.mse[i] <- (1/p)*sum((beta.hat-beta)^2) lasso.mpe[i] <- (1/n)*sum((X%*%(beta.hat-beta))^2) lasso.size[i] <- length(which(beta.hat != 0)) ## TP, TN, FP, FN tp = length(which(beta != 0 & beta.hat != 0)) tn = length(which(beta == 0 & beta.hat == 0)) fp = length(which(beta == 0 & beta.hat != 0)) fn = length(which(beta != 0 & beta.hat == 0)) ## FDR, FNR, and MCC lasso.fdr[i] <- fp/(tn+fp) lasso.fnr[i] <- fn/(tp+fn) lasso.mcc[i] <- (tp*tn - fp*fn)/(sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))) ########## ## SCAD ## ########## cv.scad <- cv.ncvreg(X, y, family=c("gaussian"), penalty="SCAD", warn=FALSE) scad.mod <- cv.scad$fit beta.hat <- scad.mod$beta[-1, cv.scad$min] ## Prediction error and model size scad.mse[i] <- (1/p)*sum((beta-beta.hat)^2) scad.mpe[i] <- (1/n)*sum((X%*%(beta.hat-beta))^2) scad.size[i] <- length(which(beta.hat != 0)) ## TP, TN, FP, FN tp = length(which(beta != 0 & beta.hat != 0)) tn = length(which(beta == 0 & beta.hat == 0)) fp = length(which(beta == 0 & beta.hat != 0)) fn = length(which(beta != 0 & beta.hat == 0)) ## FDR, FNR, and MCC scad.fdr[i] <- fp/(tn+fp) scad.fnr[i] <- fn/(tp+fn) scad.mcc[i] <- (tp*tn - fp*fn)/(sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))) ######### ## MCP ## ######### cv.mcp <- cv.ncvreg(X, y, family=c("gaussian"), penalty="MCP", warn=FALSE) mcp.mod <- cv.mcp$fit beta.hat <- mcp.mod$beta[-1, cv.mcp$min] ## Prediction error and model size mcp.mse[i] <- (1/p)*sum((beta-beta.hat)^2) mcp.mpe[i] <- (1/n)*sum((X%*%(beta.hat-beta))^2) mcp.size[i] <- length(which(beta.hat != 0)) ## TP, TN, FP, FN tp = length(which(beta != 0 & beta.hat != 0)) tn = length(which(beta == 0 & beta.hat == 0)) fp = length(which(beta == 0 & beta.hat != 0)) fn = length(which(beta != 0 & beta.hat == 0)) ## FDR, FNR, and MCC mcp.fdr[i] <- fp/(tn+fp) mcp.fnr[i] <- fn/(tp+fn) mcp.mcc[i] <- (tp*tn - fp*fn)/(sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))) } ## Display the mean results methods <- c("SS-LASSO", "LASSO", "SCAD", "MCP") metrics <- c("MSE", "MPE", "Size", "FDR", "FNR", "MCC") results.table <- matrix(0, nrow=4, ncol=6) results.table[, 1] <- c(mean(ssl.mse), mean(lasso.mse), mean(scad.mse), mean(mcp.mse)) results.table[, 2] <- c(mean(ssl.mpe), mean(lasso.mpe), mean(scad.mpe), mean(mcp.mpe)) results.table[, 3] <- c(mean(ssl.size), mean(lasso.size), mean(scad.size), mean(mcp.size)) results.table[, 4] <- c(mean(ssl.fdr), mean(lasso.fdr), mean(scad.fdr), mean(mcp.fdr)) results.table[, 5] <- c(mean(ssl.fnr), mean(lasso.fnr), mean(scad.fnr), mean(mcp.fnr)) results.table[, 6] <- c(mean(ssl.mcc), mean(lasso.mcc), mean(scad.mcc), mean(mcp.mcc)) results.table <- as.data.frame(results.table) rownames(results.table) <- methods names(results.table) <- metrics ## Display results results.table ## Empirical standard errors se.table <- matrix(0, nrow=4, ncol=6) se.table[, 1] <- c(sd(ssl.mse), sd(lasso.mse), sd(scad.mse), sd(mcp.mse)) se.table[ ,2] <- c(sd(ssl.mpe), sd(lasso.mpe), sd(scad.mpe), sd(mcp.mpe)) se.table[, 3] <- c(sd(ssl.size), sd(lasso.size), sd(scad.size), sd(mcp.size)) se.table[, 4] <- c(sd(ssl.fdr), sd(lasso.fdr), sd(scad.fdr), sd(mcp.fdr)) se.table[, 5] <- c(sd(ssl.fnr), sd(lasso.fnr), sd(scad.fnr), sd(mcp.fnr)) se.table[, 6] <- c(sd(ssl.mcc), sd(lasso.mcc), sd(scad.mcc), sd(mcp.mcc)) se.table <- as.data.frame(se.table) rownames(se.table) <- methods names(se.table) <- metrics ## Display standard errors se.table