#################################################### ## You can copy and paste this code into your R ## ## GUI to reproduce the simulation results in ## ## the paper, "Spike-and-Slab Meets Lasso: A ## ## Review of the Spike-and-Slab Lasso" by Ray ## ## Bai, Veronika Rockova, and Edward I. George ## #################################################### ## First specify the packages of interest packages = c("SSLASSO", "mvtnorm", "ncvreg") ## Now load or install & load all package.check <- lapply( packages, FUN = function(x) { if (!require(x, character.only = TRUE)) { install.packages(x, dependencies = TRUE) library(x, character.only = TRUE) } } ) ## Set seed set.seed(12345) ################ ## Parameters ## ################ # p: number of predictors # n: number of observations # Y: response, n x 1 vector # X: predictors, n x p matrix # beta: regression coefficients, p x 1 vector # sigma_true: true error standard deviation # q: number of non-zero beta # Sigma: covariance of predictors, X # rho: correlation in blocks of Sigma # n.rep: number of replications p <- 1000 n <- 100 q <- 6 rho <- 0.9 block <- matrix(rho, 50, 50) Sigma <- diag(20)%x%block diag(Sigma) <- 1 sigma_true <- sqrt(3) n.rep <- 500 ## Vectors to hold results ssl.pe <- rep(0, n.rep) lasso.pe <- rep(0, n.rep) scad.pe <- rep(0, n.rep) mcp.pe <- rep(0, n.rep) ## Model size ssl.size <- rep(0, n.rep) lasso.size <- rep(0, n.rep) scad.size <- rep(0, n.rep) mcp.size <- rep(0, n.rep) ssl.fdr <- rep(0, n.rep) lasso.fdr <- rep(0, n.rep) scad.fdr <- rep(0, n.rep) mcp.fdr <- rep(0, n.rep) ssl.fnr <- rep(0, n.rep) lasso.fnr <- rep(0, n.rep) scad.fnr <- rep(0, n.rep) mcp.fnr <- rep(0, n.rep) ssl.mcc <- rep(0, n.rep) lasso.mcc <- rep(0, n.rep) scad.mcc <- rep(0, n.rep) mcp.mcc <- rep(0, n.rep) ################################ ## Fit models and get results ## ## for 500 replications ## ################################ for(i in 1:n.rep){ ## Generate data X <- rmvnorm(n, numeric(p), Sigma) index <- c(1, 51, 101, 151, 201, 251) beta <- numeric(p) beta[index] <- c(-2.5,-2,-1.5,1.5,2,2.5) y <- X %*% beta + rnorm(n, sd=sigma_true) ########################## ## Spike-and-slab lasso ## ########################## ssl.mod <- SSLASSO(X, y, variance = "unknown") beta.hat <- ssl.mod$beta[,ncol(ssl.mod$beta)] ## Prediction error and model size ssl.pe[i] <- sum((X%*%(beta.hat-beta))^2) ssl.size[i] <- length(which(beta.hat != 0)) ## TP, TN, FP, FN tp = length(which(beta != 0 & beta.hat != 0)) tn = length(which(beta == 0 & beta.hat == 0)) fp = length(which(beta == 0 & beta.hat != 0)) fn = length(which(beta != 0 & beta.hat == 0)) ## FDR, FNR, and MCC ssl.fdr[i] <- fp/(fp+tn) ssl.fnr[i] <- fn/(fn+tp) ssl.mcc[i] <- (tp*tn - fp*fn)/(sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))) ########### ## lasso ## ########### cv.lasso <- cv.ncvreg(X, y, family=c("gaussian"), penalty="lasso", warn=FALSE) lasso.mod <- cv.lasso$fit beta.hat <- lasso.mod$beta[-1, cv.lasso$min] ## Prediction error and model size lasso.pe[i] <- sum((X%*%(beta.hat-beta))^2) lasso.size[i] <- length(which(beta.hat != 0)) ## TP, TN, FP, FN tp = length(which(beta != 0 & beta.hat != 0)) tn = length(which(beta == 0 & beta.hat == 0)) fp = length(which(beta == 0 & beta.hat != 0)) fn = length(which(beta != 0 & beta.hat == 0)) ## FDR, FNR, and MCC lasso.fdr[i] <- fp/(fp+tn) lasso.fnr[i] <- fn/(fn+tp) lasso.mcc[i] <- (tp*tn - fp*fn)/(sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))) ########## ## SCAD ## ########## cv.scad <- cv.ncvreg(X, y, family=c("gaussian"), penalty="SCAD", warn=FALSE) scad.mod <- cv.scad$fit beta.hat <- scad.mod$beta[-1, cv.scad$min] ## Prediction error and model size scad.pe[i] <- sum((X%*%(beta.hat-beta))^2) scad.size[i] <- length(which(beta.hat != 0)) ## TP, TN, FP, FN tp = length(which(beta != 0 & beta.hat != 0)) tn = length(which(beta == 0 & beta.hat == 0)) fp = length(which(beta == 0 & beta.hat != 0)) fn = length(which(beta != 0 & beta.hat == 0)) ## FDR, FNR, and MCC scad.fdr[i] <- fp/(fp+tn) scad.fnr[i] <- fn/(fn+tp) scad.mcc[i] <- (tp*tn - fp*fn)/(sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))) ######### ## MCP ## ######### cv.mcp <- cv.ncvreg(X, y, family=c("gaussian"), penalty="MCP", warn=FALSE) mcp.mod <- cv.mcp$fit beta.hat <- mcp.mod$beta[-1, cv.mcp$min] ## Prediction error and model size mcp.pe[i] <- sum((X%*%(beta.hat-beta))^2) mcp.size[i] <- length(which(beta.hat != 0)) ## TP, TN, FP, FN tp = length(which(beta != 0 & beta.hat != 0)) tn = length(which(beta == 0 & beta.hat == 0)) fp = length(which(beta == 0 & beta.hat != 0)) fn = length(which(beta != 0 & beta.hat == 0)) ## FDR, FNR, and MCC mcp.fdr[i] <- fp/(fp+tn) mcp.fnr[i] <- fn/(fn+tp) mcp.mcc[i] <- (tp*tn - fp*fn)/(sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))) } ## Display the mean results methods <- c("SS-LASSO", "LASSO", "SCAD", "MCP") metrics <- c("MPE", "Size", "FDR", "FNR", "MCC") results.table <- matrix(0, nrow=4, ncol=5) results.table[, 1] <- c(mean(ssl.pe/n), mean(lasso.pe/n), mean(scad.pe/n), mean(mcp.pe/n)) results.table[, 2] <- c(mean(ssl.size), mean(lasso.size), mean(scad.size), mean(mcp.size)) results.table[, 3] <- c(mean(ssl.fdr), mean(lasso.fdr), mean(scad.fdr), mean(mcp.fdr)) results.table[, 4] <- c(mean(ssl.fnr), mean(lasso.fnr), mean(scad.fnr), mean(mcp.fnr)) results.table[, 5] <- c(mean(ssl.mcc), mean(lasso.mcc), mean(scad.mcc), mean(mcp.mcc)) results.table <- as.data.frame(results.table) rownames(results.table) <- methods names(results.table) <- metrics ## Display results results.table ################################### ## Run a single simulation to ## ## illustrate the solution paths ## ################################### set.seed(123) ## Generate data X <- rmvnorm(n, numeric(p), Sigma) index <- c(1, 51, 101, 151, 201, 251) beta <- numeric(p) beta[index] <- c(-2.5,-2,-1.5,1.5,2,2.5) y <- X %*% beta + rnorm(n, sd=sigma_true) ########################## ## Spike-and-slab lasso ## ########################## ssl.mod <- SSLASSO(X, y, variance = "unknown") plot(ssl.mod)