###### Estimating Democratic Mood, 1988-2020 ###### Running Stan Models Using cmdstanr ###### v6 # libraries library(arm) library(dplyr) library(tidyr) library(loo) library(ggplot2) library(bayesplot) library(cmdstanr) # options options("cmdstanr_verbose" = TRUE) options(mc.cores = parallel::detectCores()) # folders WD = rstudioapi::getActiveDocumentContext()$path setwd(dirname(WD)) print( getwd() ) # read support for dem data supdem = read.csv("Support for democracy raw 0522.csv") ## Edit data # remove NAs supdem = supdem[!supdem$Response==0, ] # order supdem = arrange(supdem, Country, Year) # create vector of response counts if they do not exist supdem$RespN = round(supdem$Response*supdem$Sample) # set first year year0 = 1987 # = year before first year of available survey data supdem = supdem[supdem$Year > year0,] # create item by country indicators supdem = unite(supdem, ItemCnt, c(Item, Country), sep = "_", remove = FALSE) sddem1 = supdem # identify countries with few years of data cnt.obs.years = rowSums(table(sddem1$Country, sddem1$Year) > 0) sort(cnt.obs.years) # run the next line to drop countries with less than 2 years of data sddem2 = sddem1[sddem1$Country %in% levels(factor(sddem1$Country))[cnt.obs.years > 1], ] length(unique(sddem2$Country)) # 144 countries with 2+ years of data ## Prepare data for stan # factorise sddem2$Country = as.factor(as.character(sddem2$Country)) sddem2$Item = as.factor(as.character(sddem2$Item)) sddem2$ItemCnt = as.factor(as.character(sddem2$ItemCnt)) sddem2$Project = as.factor(as.character(sddem2$Project)) sddem2$Year = sddem2$Year-year0 # extract data n.items = length(unique(sddem2$Item)) n.cntrys = length(unique(sddem2$Country)) n.yrs = 2020-year0 # estimates up to 2020 n.resp = dim(sddem2)[1] n.itm.cnt = length(unique(sddem2$ItemCnt)) cntrys = as.numeric(factor(sddem2$Country)) cnt.names = levels(sddem2$Country) items = as.numeric(factor(sddem2$Item)) yrs = sddem2$Year itm.cnts = as.numeric(factor(sddem2$ItemCnt)) mean.resp.log = logit(mean(sddem2$Response)) # create item-country length indicator for items item.ind.kp = rep(0, length(levels(sddem2$ItemCnt))) for(i in 1:length(levels(sddem2$Item))) { item.ind.kp[grepl(levels(sddem2$Item)[i], levels(sddem2$ItemCnt))] = i } item.ind.len = sapply(lapply(levels(sddem2$Item), function(x) grep(x, levels(sddem2$ItemCnt))), length) ## Fit stan model # specify data for stan dat.1 = list(N=n.resp, K=n.items, T=n.yrs, J=n.cntrys, P=n.itm.cnt, jj=cntrys, tt=yrs, pp=itm.cnts, kk=items, it_len=item.ind.len, x=sddem2$RespN, samp=sddem2$Sample, mn_resp_log=mean.resp.log) sapply(dat.1, summary) # parameters to save pars.1 = c("Sigma","Omega","sigma_delta","sigma_theta","phi","mu_lambda","lambda","gamm","delta", "theta","x_pred","log_lik") # iterations for MCMC simulation n.iter = 1000 n.warm = 500 n.samp = n.iter - n.warm n.chn = 4 # compile model stan.mod = cmdstan_model('stan.mod6.7.stan') # Stan fit fit.mod= stan.mod$sample( data = dat.1, chains = n.chn, init = 1, parallel_chains = n.chn, iter_warmup = n.warm, iter_sampling = n.samp, refresh = round(n.iter/20, 0), adapt_delta = 0.90, max_treedepth = 13, save_warmup = FALSE ) ## Check convergence # Examine model fit res = fit.mod res$cmdstan_diagnose() res.tab = res$print(pars.1, max_rows=80, digits=3) sum = res$summary(pars.1) print(sum[order(sum$rhat, decreasing=TRUE), ], n=50) res_neff_ratio = neff_ratio(res) res_neff_ratio[order(res_neff_ratio, decreasing=FALSE)][1:50] # traceplot tp.pars = c("Sigma[1,1]","Sigma[2,2]","Omega[1,2]","sigma_theta","sigma_delta","mu_lambda", "phi","lambda[31]","delta[23]","gamm[31]","theta[26,132]","theta[31,121]") tp = bayesplot::mcmc_trace(res$draws(tp.pars), size=0.3, np=nuts_params(res)) tp ## Extract and save mood estimates theta.m.out = apply(res$draws("theta"), 3, as.vector) (theta.m.mean = mean(as.vector(theta.m.out))) (theta.m.sd = sd(as.vector(theta.m.out))) theta.m.std = (theta.m.out - theta.m.mean) / theta.m.sd # standardize theta.m.t = apply(theta.m.std, 1, function(x) t(x) ) theta.pe = apply(theta.m.t, 1, mean) theta.u95 = apply(theta.m.t, 1, quantile, probs=c(0.975)) theta.l95 = apply(theta.m.t, 1, quantile, probs=c(0.025)) theta.sd = apply(theta.m.t, 1, sd) theta.m.df = data.frame(Country=rep(cnt.names, each=n.yrs), Year=rep(1988:2020, times=n.cntrys), SupDem=theta.pe, SupDem_u95=theta.u95, SupDem_l95=theta.l95, SupDem_sd=theta.sd) # save country-year point estimates write.csv(theta.trim, "mood_est.csv", row.names=FALSE)