############################################################################################
# REPLICATION SCRIPT (R)
# Paper:   "Meta-Analysis of Field Studies on Beauty and Professional Success"
# Authors: Z. Irsova, T. Havranek, K. Bortnikova, F. Bartoš
#
# Purpose:
#   Replicate all key R-based analyses:
#     (A) Publication-bias corrections on beauty_R.xlsx:
#         - Right-Truncated Meta-Analysis (Mathur)
#         - STEM-based method (Furukawa)
#         - Selection model (Andrews & Kasy)
#     (B) RoBMA on beauty.xlsx (full set of models and subgroups)
#     (C) Bayesian Model Averaging (BMA) with 'BMS' on beauty_R.xlsx
#     (D) Best-practice (BP) predictions from BMA posteriors
#     (E) Occupation-weighted representative estimate (corrected pooled CI)
#
# Data requirements:
#   - beauty_R.xlsx  (created by beauty.do: export for BMA / publication-bias analyses)
#   - beauty.xlsx    (main meta-analysis dataset with sheet "data" for RoBMA)
#
# Software requirements:
#   - R (version >= 4.x recommended)
#   - Packages:
#       phacking      (Mathur RTMA)
#       BMS           (BMA)
#       readxl
#       metastudies   (Andrews & Kasy)
#       ggplot2       (STEM figures)
#       data.table    (STEM data_median)
#       RoBMA         (RoBMA / NoBMA)
#       metafor       (prior scaling)
#       job           (parallel jobs for RoBMA fits)
#
# Outline of sections:
#   1. Load Required Packages and Datasets
#        - Load all required packages.
#        - Detect script directory (rstudioapi or getwd()).
#        - Read beauty_R.xlsx (publication bias, BMA, representative estimate).
#        - Read beauty.xlsx (RoBMA).
#
#   2. Right-Truncated Meta-Analysis (RTMA; Mathur) on beauty_R.xlsx
#        - Helper run_rtma(): RTMA on arbitrary subsample and (y, se).
#        - RTMA for:
#            * Full sample
#            * Sex workers only / No sex workers
#            * Penalties only / No penalties
#            * Cognitive measured OR quasi-experimental
#            * Standardized effect (premium_sd_w, se_premium_sd_w)
#
#   3. STEM-Based Method (Furukawa) on beauty_R.xlsx
#        - Implement STEM algorithm:
#            * stem(), stem_converge(), stem_compute()
#            * variance_b(), variance_0(), weighted_mean(), weighted_mean_squared()
#        - Plotting helpers:
#            * se_rescale(), stem_funnel(), stem_MSE()
#        - Study-level median aggregation: data_median()
#        - STEM estimates for the same subsamples as RTMA.
#
#   4. Andrews and Kasy metastudies model on beauty_R.xlsx
#        - Helper run_ak_subsample(): metastudies_estimation() + bootstrap_specification_tests().
#        - Apply to:
#            * Full sample
#            * Sex workers only / No sex workers
#            * Penalties only / No penalties
#            * Cognitive measured OR quasi-experimental
#            * Standardized effect
#
#   5. RoBMA on beauty.xlsx
#        - Winsorization of premium and se (level and standardized effects).
#        - Construction of subgroup and adjustment datasets (industry, occupation,
#          gender of rated, facing customer, output measurability, interaction intensity,
#          cognition, salary, no prostitutes, scalability, etc.).
#        - Prior scaling for RoBMA (metafor + RoBMA:::scale_d2z).
#        - Main RoBMA and NoBMA models (2-level and 3-level, including no-prostitutes
#          and scalability SD variants), fitted via job::job().
#        - RoBMA subgroup models (industry, occupation, gender, customer contact,
#          output measurability, interaction intensity, cognition, salary).
#        - Adjusted RoBMA/NoBMA models with covariates (prostitutes, quasi_OR_control).
#        - Reload all fit_*.RDS and fit_adj_*.RDS objects.
#        - Summary helpers:
#            * add_row(): extract pooled estimates, tau, Bayes factors, marginal summaries.
#            * out1–out4: write CSV tables (main/subgroup, unadjusted/adjusted).
#        - Z-curve comparison plots (unadjusted and adjusted) exported as PDFs.
#        - sessionInfo() at the end of Section 5.
#
#   6. Bayesian Model Averaging (BMS)
#        - Relabel columns in databeauty to readable names.
#        - Select BMA variables and drop missing values.
#        - BMS models:
#            * beauty1: UIP + dilution prior (benchmark).
#            * beauty2: BRIC g-prior + random model prior.
#            * beauty3: UIP + dilution prior, excluding beauty penalties.
#        - Standard BMS outputs (coef, image, summary, plot, top models).
#
#   7. Best-practice (BP) predictions from benchmark model
#        - Extract posterior means from beauty1 (coef()).
#        - Define baseline Best-Practice profile (bp).
#        - Helper bp_predict(): linear prediction for given profile.
#        - Compute:
#            * Baseline BP estimate
#            * Subgroup BP predictions:
#                - Athletes, Politicians, Prostitutes, Students,
#                  Teachers & Scientists, Male/Female subjects, Earnings
#                - Low / Mid / High interpersonal intensity
#                - Low / Mid / High output measurability
#        - Print bp_results table.
#
#   8. Occupation-weighted representative estimate (corrected pooled CI)
#        - get_corrected_pooled_ci(): mean-corrected split-normal pooling with
#          simulation-based CI using occupation weights.
#        - run_representative_estimate(): median-aggregate at study level (data_median),
#          map occupation weights, compute corrected pooled mean and 95% CrI.
#        - repres_full: representative, occupation-weighted beauty premium
#          using employment shares.
#
# The script assumes both .xlsx files are in the same directory as this script.
############################################################################################


############################################################################################
# SECTION 1: Load Required Packages and Datasets
############################################################################################

# ---- 1.1 Load packages ----
required_pkgs <- c(
  "phacking", "BMS", "readxl", "metastudies",
  "ggplot2", "data.table", "RoBMA", "metafor", "job"
)

for (p in required_pkgs) {
  if (!requireNamespace(p, quietly = TRUE)) {
    stop(sprintf("Package '%s' is not installed. Please install it before running the script.", p))
  }
}
library(phacking)
library(BMS)
library(readxl)
library(metastudies)
library(ggplot2)
library(data.table)
library(RoBMA)
library(metafor)
library(job)

# 'rstudioapi' is optional: use it only if available, otherwise fall back to getwd()
if (requireNamespace("rstudioapi", quietly = TRUE) &&
    rstudioapi::isAvailable()) {
  script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
} else {
  script_dir <- getwd()
}

# ---- 1.2 Load beauty_R.xlsx for publication bias + BMA + representative estimate ----
data_path_R <- file.path(script_dir, "beauty_R.xlsx")
if (!file.exists(data_path_R)) {
  stop(sprintf("File 'beauty_R.xlsx' not found at: %s", data_path_R))
}

databeauty <- readxl::read_excel(data_path_R)

cat("\n============================================================\n")
cat("Dataset 'beauty_R.xlsx' loaded from:", data_path_R, "\n")
cat("Number of rows:", nrow(databeauty), "\n")
cat("Number of columns:", ncol(databeauty), "\n\n")
str(databeauty)

# ---- 1.3 Load beauty.xlsx for RoBMA ----
data_path_robma <- file.path(script_dir, "beauty.xlsx")
if (!file.exists(data_path_robma)) {
  stop(sprintf("File 'beauty.xlsx' not found at: %s", data_path_robma))
}

df <- readxl::read_excel(data_path_robma, sheet = "data")

cat("\n============================================================\n")
cat("Dataset 'beauty.xlsx' (RoBMA) loaded from:", data_path_robma, "\n")
cat("Number of rows:", nrow(df), "\n")
cat("Number of columns:", ncol(df), "\n\n")
str(df)


############################################################################################
# SECTION 2: Right-Truncated Meta-Analysis (RTMA; Mathur) on beauty_R.xlsx
############################################################################################

# Helper to run RTMA on a given (sub)sample and (y, se) variables
run_rtma <- function(data, y_var, se_var,
                     subset_idx = NULL,
                     label = "") {
  
  if (is.null(subset_idx)) {
    subset_idx <- rep(TRUE, nrow(data))
  }
  
  ok <- subset_idx & !is.na(data[[y_var]]) & !is.na(data[[se_var]])
  d  <- data[ok, ]
  
  cat("\n============================================================\n")
  cat("RTMA for:", label, "\n")
  cat("Observations used:", nrow(d), "\n")
  
  if (nrow(d) == 0) {
    cat("No observations in this subsample. Skipping.\n")
    return(invisible(NULL))
  }
  
  yi  <- d[[y_var]]
  sei <- d[[se_var]]
  vi  <- sei^2
  
  # Z-score density plot
  plot.new()
  z_density(
    yi, vi,
    sei          = sei,
    alpha_select = 0.05,
    crit_color   = "red"
  )
  
  # Nonaffirmative (insignificant) count
  z_scores       <- yi / sei
  nonaffirmative <- abs(z_scores) < 1.96
  num_insig      <- sum(nonaffirmative)
  total          <- length(z_scores)
  prop_insig     <- round(100 * num_insig / total, 1)
  
  cat(sprintf("Nonaffirmative (insignificant) estimates: %d of %d (%.1f%%)\n",
              num_insig, total, prop_insig))
  
  # RTMA via phacking_meta()
  fit <- phacking_meta(
    yi             = yi,
    vi             = vi,
    favor_positive = TRUE,      # beauty premium expected to be positive
    alpha_select   = 0.05,
    ci_level       = 0.95,
    stan_control   = list(adapt_delta = 0.98, max_treedepth = 20),
    parallelize    = TRUE
  )
  
  print(summary(fit))
  invisible(fit)
}

# ---- 2.1 RTMA runs for requested subsamples (beauty_R.xlsx) ----

# 0) Full sample
rtma_full <- run_rtma(
  data   = databeauty,
  y_var  = "premium_w",
  se_var = "se_premium_w",
  label  = "Full sample"
)

# 1) Sex workers only
rtma_sexworkers_only <- run_rtma(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$prostitutes == 1),
  label      = "Sex workers"
)

# 2) No sex workers
rtma_no_sexworkers <- run_rtma(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$prostitutes == 0),
  label      = "No sex workers"
)

# 3) Penalties only
rtma_penalties_only <- run_rtma(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$beauty_penalty == 1),
  label      = "Penalties only"
)

# 4) No penalties
rtma_no_penalties <- run_rtma(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$beauty_penalty == 0),
  label      = "No penalties"
)

# 5) Cognitive measured OR quasi-experimental
rtma_cog_or_quasi <- run_rtma(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$cognitive_measured == 1 |
                  databeauty$quasi_experimental_method == 1),
  label      = "Cognitive measured or quasi-experimental"
)

# 6) Standardized effect
rtma_std_full <- run_rtma(
  data   = databeauty,
  y_var  = "premium_sd_w",
  se_var = "se_premium_sd_w",
  label  = "Standardized effect"
)


############################################################################################
# SECTION 3: STEM-Based Method (Furukawa) on beauty_R.xlsx 
############################################################################################

## 3.0 STEM parameters
tolerance    <- 10^(-4)  # convergence tolerance
max_N_count  <- 10^3     # max number of iterations
param        <- c(tolerance, max_N_count)

## 3.1 Outer algorithm: stem and stem_converge

stem <- function(beta, se, param) {
  N_study <- length(beta)
  
  beta_equal        <- mean(beta)
  max_sigma_squared <- variance_0(N_study, beta, se, beta_equal)
  max_sigma         <- sqrt(max_sigma_squared)
  min_sigma         <- 0
  tolerance         <- param[1]
  
  data1       <- cbind(beta, se)
  data_sorted <- data1[order(data1[, 2]), ]
  beta_sorted <- data_sorted[, 1]
  se_sorted   <- data_sorted[, 2]
  
  output_max <- stem_converge(max_sigma, beta_sorted, se_sorted, param)
  output_min <- stem_converge(min_sigma, beta_sorted, se_sorted, param)
  Y_max      <- output_max$estimates
  Y_min      <- output_min$estimates
  
  diff_sigma <- abs(Y_max[3] - Y_min[3])
  if (diff_sigma > (2 * tolerance)) {
    multiple <- 1
  } else {
    multiple <- 0
  }
  
  n_stem   <- Y_max[4]
  sigma0   <- Y_max[3]
  inv_var  <- 1 / (se_sorted^2 + sigma0^2)
  info_in_sample <- sum(inv_var[1:n_stem]) / sum(inv_var)
  
  Y1 <- c(Y_max, multiple, info_in_sample)
  Y2 <- t(Y1)
  Z1 <- output_max$MSE
  Z2 <- t(Z1)
  
  colnames(Y2) <- c(
    "estimate", "se", "sd of total heterogeneity",
    "n_stem", "n_iteration", "multiple", "% info used"
  )
  colnames(Z2) <- c("MSE", "variance", "bias_squared")
  output <- list("estimates" = Y2, "MSE" = Z2)
  return(output)
}

stem_converge <- function(initial_sigma, beta_sorted, se_sorted, param) {
  converged   <- 0
  N_count     <- 0
  tolerance   <- param[1]
  max_N_count <- param[2]
  sigma0      <- initial_sigma
  
  while (converged == 0) {
    output  <- stem_compute(beta_sorted, se_sorted, sigma0)
    Y_stem  <- output$estimates
    sigma   <- Y_stem[3]
    evolution <- abs(sigma0 - sigma)
    N_count <- N_count + 1
    
    if (evolution < tolerance) {
      converged <- 1
    } else if (N_count > max_N_count) {
      converged <- 1
    } else {
      sigma0 <- sigma
    }
  }
  Y      <- c(Y_stem, N_count)
  Z      <- output$MSE
  output <- list("estimates" = Y, "MSE" = Z)
  return(output)
}

## 3.2 Inner algorithm

stem_compute <- function(beta, se, sigma0) {
  
  N_study <- length(beta)
  
  Eb_all            <- weighted_mean(beta, se, sigma0)
  Eb_leave_top_out  <- weighted_mean(beta[2:N_study], se[2:N_study], sigma0)
  Eb_squared        <- weighted_mean_squared(beta[2:N_study], se[2:N_study], sigma0)
  MSE_original      <- Eb_squared - 2 * beta[1] * Eb_leave_top_out
  
  Var_all <- variance_b(se, sigma0)
  
  n_stem_min <- 3
  MSE        <- MSE_original[(n_stem_min - 1):(N_study - 1)]
  Bias       <- MSE - Var_all[n_stem_min:N_study]
  index      <- which.min(MSE)
  n_stem     <- index + (n_stem_min - 1)
  
  beta_stem <- Eb_all[n_stem]
  se_stem   <- Var_all[n_stem]^(0.5)
  var_stem  <- variance_0(N_study, beta, se, beta_stem)
  sigma_stem <- sqrt(var_stem)
  
  Y <- cbind(beta_stem, se_stem, sigma_stem, n_stem)
  Z <- rbind(MSE, Var_all[n_stem_min:N_study], Bias[(n_stem_min - 1):(N_study - 1)])
  output <- list("estimates" = Y, "MSE" = Z)
  return(output)
}

variance_b <- function(se, sigma) {
  N_study <- length(se)
  Y       <- vector(mode = "numeric", length = N_study)
  proportional_weights <- 1 / (se^2 + sigma^2)
  
  for (i in 1:N_study) {
    Y[i] <- 1 / sum(proportional_weights[1:i])
  }
  return(Y)
}

variance_0 <- function(n_stem, beta, se, beta_mean) {
  weights      <- 1 / (se[1:n_stem]^2)
  total_weight <- sum(weights)
  
  Y1  <- (t(weights) %*% (beta[1:n_stem] - beta_mean)^2) - (n_stem - 1)
  Y2  <- total_weight - (t(weights) %*% weights) / total_weight
  var <- pmax(0, Y1 / Y2)
  
  return(var)
}

weighted_mean <- function(beta, se, sigma) {
  N_study <- length(beta)
  Y       <- vector(mode = "numeric", length = N_study)
  
  proportional_weights <- 1 / (se^2 + sigma^2)
  
  for (i in 1:N_study) {
    Y[i] <- beta[1:i] %*% proportional_weights[1:i] / sum(proportional_weights[1:i])
  }
  return(Y)
}

weighted_mean_squared <- function(beta, se, sigma) {
  N <- length(beta)
  Y <- vector(mode = "numeric", length = N)
  
  weights       <- 1 / (se^2 + sigma^2)
  weights_beta  <- weights * beta
  
  W  <- weights %o% weights
  WB <- weights_beta %o% weights_beta
  
  for (i in 2:N) {
    Y1    <- sum(WB[1:i, 1:i]) - sum(weights_beta[1:i]^2)
    Y2    <- sum(W[1:i, 1:i]) - sum(weights[1:i]^2)
    Y[i]  <- Y1 / Y2
  }
  return(Y)
}

## 3.3 Figures for STEM

se_rescale <- function(se) {
  -log10(se)
}

stem_funnel <- function(beta_input, se_input, stem_estimates) {
  b_stem    <- stem_estimates[1]
  SE_b_stem <- stem_estimates[2]
  sigma0    <- stem_estimates[3]
  n_stem    <- stem_estimates[4]
  
  data_input   <- cbind(beta_input, se_input)
  data_sorted  <- data_input[order(data_input[, 2]), ]
  beta_sorted  <- data_sorted[, 1]
  se_sorted    <- data_sorted[, 2]
  cumulative_estimates <- weighted_mean(beta_sorted, se_sorted, sigma0)
  
  t_stat        <- 1.96
  lineswidth    <- 2.5
  filled_diamond <- 18
  points_size   <- 2
  se_axis_min   <- 0
  beta_axis_min <- 0.6
  beta_axis_max <- 1.2
  labNames      <- c("Coefficient ", "Precision ")
  
  se_axis <- se_rescale(se_sorted)
  
  plot.new()
  par(mar = c(4.1, 4.1, 1, 1))
  plot(beta_sorted, se_axis,
       col  = rgb(102, 102, 255, maxColorValue = 255), pch = 1, lwd = 2.5,
       xlim = c(beta_axis_min, beta_axis_max),
       xlab = substitute(paste(name, beta), list(name = labNames[1])),
       ylab = substitute(paste(name, -log(SE)), list(name = labNames[2])))
  lines(cumulative_estimates, se_axis,
        col = rgb(96, 96, 96, maxColorValue = 255), lwd = lineswidth)
  points(b_stem, se_axis[n_stem],
         pch = filled_diamond, col = rgb(0, 0, 153, maxColorValue = 255),
         cex = points_size)
  segments(b_stem, se_axis[1], b_stem, se_axis_min,
           col = rgb(0, 0, 153, maxColorValue = 255), lwd = lineswidth)
  points(b_stem, se_axis[1],
         pch = filled_diamond, col = rgb(255, 128, 0, maxColorValue = 255),
         cex = points_size)
  segments(b_stem - t_stat * SE_b_stem, se_axis[1],
           b_stem + t_stat * SE_b_stem, se_axis[1],
           col = rgb(255, 128, 0, maxColorValue = 255), lwd = lineswidth)
  abline(v = 0, col = rgb(192, 192, 192, maxColorValue = 255),
         lty = 2, lwd = lineswidth)
  
  legend("topleft",
         legend = c(
           "stem-based estimate", "95 confidence interval",
           "cumulative estimate", "minimal precision", "study"
         ),
         col = c(
           rgb(255, 128, 0, maxColorValue = 255),
           rgb(255, 128, 0, maxColorValue = 255),
           rgb(96, 96, 96, maxColorValue = 255),
           rgb(0, 0, 153, maxColorValue = 255),
           rgb(102, 102, 255, maxColorValue = 255)
         ),
         bty    = "n",
         lty    = c(NA, 1, 1, NA, NA),
         lwd    = c(NA, 2, 2, NA, 2.5),
         pch    = c(18, NA, NA, 18, 1),
         pt.cex = 1.8, cex = 0.8,
         horiz  = FALSE, inset = c(0, 0),
         y.intersp = 1.5)
}

stem_MSE <- function(V) {
  MSE          <- V[, 1]
  bias_squared <- V[, 3]
  variance     <- V[, 2]
  N_study      <- nrow(V)
  
  N_min   <- 2
  lineset <- 2.5
  num_study <- (N_min + 1):(N_study + 1)
  
  layout(matrix(c(1, 2, 3, 3), 2, 2, byrow = TRUE))
  plot(num_study, bias_squared[N_min:N_study], type = "l",
       col = "blue", lwd = lineset,
       xlab = "Num of included studies i", ylab = "",
       main = expression(Bias^2 - b[0]^2))
  plot(num_study, variance[N_min:N_study], type = "l",
       col = "blue", lwd = lineset,
       xlab = "Num of included studies i", ylab = "",
       main = expression(Variance))
  plot(num_study, MSE[N_min:N_study], type = "l",
       col = "blue", lwd = lineset,
       xlab = "Num of included studies i", ylab = "",
       main = expression(MSE - b[0]^2))
}

## 3.4 Auxiliary function: median aggregation at study level

data_median <- function(data, id_var, main_var, additional_var) {
  
  complete_data <- na.omit(data)
  
  column_id <- eval((substitute(complete_data[a], list(a = id_var))))
  colnames(column_id)[1] <- "id"
  column_main <- eval((substitute(complete_data[a], list(a = main_var))))
  colnames(column_main)[1] <- "main"
  column_additional <- eval((substitute(complete_data[a], list(a = additional_var))))
  colnames(column_additional)[1] <- "additional"
  
  columns_main_merged       <- merge(column_id, column_main, by = 0, all = TRUE)
  columns_additional_merged <- merge(column_id, column_additional, by = 0, all = TRUE)
  
  median_only <- aggregate(main ~ id, columns_main_merged, median)
  
  median_together <- merge(median_only, columns_main_merged,
                           by.x = "id", by.y = "id")
  median_all <- merge(median_together, columns_additional_merged,
                      by.x = "Row.names", by.y = "Row.names")
  
  median_all$diff_squared <- (median_all$main.x - median_all$main.y)^2
  table_form              <- data.table(median_all)
  median_combined <- table_form[, .SD[which.min(diff_squared)], by = id.x]
  
  median_combined2 <- median_combined[order(median_combined$id.x), ]
  median_combined3 <- median_combined2[, c("id.x", "main.x", "additional")]
  colnames(median_combined3)[1] <- "ID"
  colnames(median_combined3)[2] <- "coefficient"
  colnames(median_combined3)[3] <- "standard_error"
  
  return(median_combined3)
}

## 3.5 STEM calls for requested subsamples (beauty_R.xlsx)

run_stem_subsample <- function(data,
                               y_var,
                               se_var,
                               subset_idx = NULL,
                               label = "",
                               param) {
  
  if (is.null(subset_idx)) {
    subset_idx <- rep(TRUE, nrow(data))
  }
  
  ok   <- subset_idx & !is.na(data[[y_var]]) & !is.na(data[[se_var]])
  dsub <- data[ok, ]
  
  cat("\n============================================================\n")
  cat("STEM results for:", label, "\n")
  cat("Observations before median-aggregation:", nrow(dsub), "\n")
  
  if (nrow(dsub) == 0) {
    cat("No observations in this subsample.\n")
    return(invisible(NULL))
  }
  
  med <- data_median(dsub, "study_id", y_var, se_var)
  cat("Observations after median-aggregation:", nrow(med), "\n")
  
  beta_med <- med$coefficient
  se_med   <- med$standard_error
  
  stem_res <- stem(beta_med, se_med, param)
  
  if (!is.null(stem_res$estimates)) {
    print(stem_res$estimates)
  } else {
    print(stem_res)
  }
  
  invisible(stem_res)
}

# 0) Full sample
stem_full <- run_stem_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = NULL,
  label      = "Full sample",
  param      = param
)

# 1) Sex workers only
stem_sexworkers_only <- run_stem_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$prostitutes == 1),
  label      = "Sex workers only",
  param      = param
)

# 2) No sex workers
stem_no_sexworkers <- run_stem_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$prostitutes == 0),
  label      = "No sex workers",
  param      = param
)

# 3) Penalties only
stem_penalties_only <- run_stem_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$beauty_penalty == 1),
  label      = "Penalties only",
  param      = param
)

# 4) No penalties
stem_no_penalties <- run_stem_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$beauty_penalty == 0),
  label      = "No penalties",
  param      = param
)

# 5) Cognitive measured OR quasi-experimental
stem_cog_or_quasi <- run_stem_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$cognitive_measured == 1 |
                  databeauty$quasi_experimental_method == 1),
  label      = "Cognitive measured OR quasi-experimental",
  param      = param
)

# 6) Standardized effect
stem_std_full <- run_stem_subsample(
  data       = databeauty,
  y_var      = "premium_sd_w",
  se_var     = "se_premium_sd_w",
  subset_idx = NULL,
  label      = "Standardized effect",
  param      = param
)


############################################################################################
# SECTION 4: Andrews and Kasy metastudies model on beauty_R.xlsx
############################################################################################

run_ak_subsample <- function(data,
                             y_var,
                             se_var,
                             subset_idx = NULL,
                             label      = "",
                             B_boot     = 500) {
  
  if (is.null(subset_idx)) {
    subset_idx <- rep(TRUE, nrow(data))
  }
  
  ok   <- subset_idx & !is.na(data[[y_var]]) & !is.na(data[[se_var]])
  dsub <- data[ok, ]
  
  cat("\n============================================================\n")
  cat("Andrews & Kasy metastudies model for:", label, "\n")
  cat("Observations used:", nrow(dsub), "\n")
  
  if (nrow(dsub) == 0) {
    cat("No observations in this subsample.\n")
    return(invisible(NULL))
  }
  
  premium_sample <- data.frame(
    X     = dsub[[y_var]],
    sigma = dsub[[se_var]]
  )
  
  ms <- metastudies_estimation(
    X         = premium_sample$X,
    sigma     = premium_sample$sigma,
    model     = "t",
    cutoffs   = c(-1.96, 0, 1.96),
    symmetric = FALSE
  )
  
  cat("\nEstimation table (ms$est_tab):\n")
  print(ms$est_tab)
  
  cat("\nCorrelations between X and sigma within metastudies:\n")
  print(metastudy_X_sigma_cors(ms))
  
  cat("\nBootstrap specification tests (B =", B_boot, "):\n")
  res_boot <- bootstrap_specification_tests(
    premium_sample$X,
    premium_sample$sigma,
    B = B_boot
  )
  print(res_boot)
  
  invisible(list(ms = ms, bootstrap = res_boot))
}

# ---- 4.1 Andrews & Kasy calls (same subsamples as STEM) ----

ak_full <- run_ak_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = NULL,
  label      = "Full sample"
)

ak_sexworkers_only <- run_ak_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$prostitutes == 1),
  label      = "Sex workers only"
)

ak_no_sexworkers <- run_ak_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$prostitutes == 0),
  label      = "No sex workers"
)

ak_penalties_only <- run_ak_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$beauty_penalty == 1),
  label      = "Penalties only"
)

ak_no_penalties <- run_ak_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$beauty_penalty == 0),
  label      = "No penalties"
)

ak_cog_or_quasi <- run_ak_subsample(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  subset_idx = (databeauty$cognitive_measured == 1 |
                  databeauty$quasi_experimental_method == 1),
  label      = "Cognitive measured OR quasi-experimental"
)

ak_std_full <- run_ak_subsample(
  data       = databeauty,
  y_var      = "premium_sd_w",
  se_var     = "se_premium_sd_w",
  subset_idx = NULL,
  label      = "Standardized effect"
)


############################################################################################
# SECTION 5: RoBMA on beauty.xlsx
############################################################################################
# author: Frantisek Bartos
# contact: f.bartos96@gmail.com

df <- readxl::read_excel("beauty.xlsx", sheet = "data")
head(df)

# 5.1 Winsorization on the RoBMA dataset
winsorize <- function(x, probs = c(0.01, 0.99)) {
  qnt <- quantile(x, probs = probs, na.rm = TRUE)
  x[x < qnt[1]] <- qnt[1]
  x[x > qnt[2]] <- qnt[2]
  return(x)
}

df$premium_w    <- winsorize(df$premium)
df$se_premium_w <- winsorize(df$se_premium)
df$premium_sd_w    <- winsorize(df$premium_sd)
df$se_premium_sd_w <- winsorize(df$se_premium_sd)

# 5.2 Basic diagnostics on RoBMA data
nrow(df)
df$study_id
df$database_id
nrow(df[df$prostitutes == 0,])
nrow(df[!is.na(df$premium_sd_w),])

t(t(sort(table(df$industry), decreasing = T)))
t(t(sort(table(df$occupation), decreasing = T)))
t(t(sort(table(df$gender_of_rated), decreasing = T)))
t(t(sort(table(df$facing_customer), decreasing = T)))
t(t(sort(table(df$output_measurability_dummy), decreasing = T)))
t(t(sort(table(df$interaction_intensity_dummy), decreasing = T)))

t(t(sort(table(df$salary), decreasing = T)))

df_noprostitutes <- df[df$prostitutes == 0,]
df_scalability   <- df[!is.na(df$premium_sd_w),]

t_industry  <- table(df$industry)
df_industry <- df[df$industry %in% names(t_industry)[t_industry > 30],]
df_industry <- df_industry[df_industry$industry != "multiple industries",]
df_industry <- df_industry[df_industry$industry != "education",]
table(df_industry$industry)
df_industry_reg <- data.frame(
  y        = df_industry$premium_w,
  se       = df_industry$se_premium_w,
  industry = df_industry$industry,
  study_id = df_industry$study_id
)

t_occupation  <- table(df$occupation)
df_occupation <- df[df$occupation %in% names(t_occupation)[t_occupation > 30],]
table(df_occupation$occupation)
df_occupation_reg <- data.frame(
  y          = df_occupation$premium_w,
  se         = df_occupation$se_premium_w,
  occupation = df_occupation$occupation,
  study_id   = df_occupation$study_id
)

df_gender_of_rated <- df[df$gender_of_rated != "both",]
table(df_gender_of_rated$gender_of_rated)
df_gender_of_rated_reg <- data.frame(
  y        = df_gender_of_rated$premium_w,
  se       = df_gender_of_rated$se_premium_w,
  gender_of_rated = df_gender_of_rated$gender_of_rated,
  study_id = df_gender_of_rated$study_id
)

df_facing_customer <- df
table(df_facing_customer$facing_customer)
df_facing_customer$facing_customer <- as.factor(df_facing_customer$facing_customer)
df_facing_customer_reg <- data.frame(
  y        = df_facing_customer$premium_w,
  se       = df_facing_customer$se_premium_w,
  facing_customer = df_facing_customer$facing_customer,
  study_id = df_facing_customer$study_id
)

df_output_measurability_dummy <- df
table(df_output_measurability_dummy$output_measurability_dummy)
df_output_measurability_dummy$output_measurability_dummy <- as.factor(df_output_measurability_dummy$output_measurability_dummy)
df_output_measurability_dummy_reg <- data.frame(
  y        = df_output_measurability_dummy$premium_w,
  se       = df_output_measurability_dummy$se_premium_w,
  output_measurability_dummy = df_output_measurability_dummy$output_measurability_dummy,
  study_id = df_output_measurability_dummy$study_id
)

df_interaction_intensity_dummy <- df
table(df_interaction_intensity_dummy$interaction_intensity_dummy)
df_interaction_intensity_dummy$interaction_intensity_dummy <- as.factor(df_interaction_intensity_dummy$interaction_intensity_dummy)
df_interaction_intensity_dummy_reg <- data.frame(
  y        = df_interaction_intensity_dummy$premium_w,
  se       = df_interaction_intensity_dummy$se_premium_w,
  interaction_intensity_dummy = df_interaction_intensity_dummy$interaction_intensity_dummy,
  study_id = df_interaction_intensity_dummy$study_id
)

df_cognition <- df
df_cognition$control <- paste0("cognitive=",df_cognition$cognitive_skill_control,", noncognitive=",df_cognition$noncognitive_skill_control)
table(df_cognition$control)
df_cognition_reg <- data.frame(
  y        = df_cognition$premium_w,
  se       = df_cognition$se_premium_w,
  control  = df_cognition$control,
  study_id = df_cognition$study_id
)

df_cognition2 <- df[df$cognitive_measured == 1, ]
table(df_cognition2$noncognitive_skill_control)
df_cognition2$noncognitive_skill_control <- as.factor(df_cognition2$noncognitive_skill_control)
df_cognition2_reg <- data.frame(
  y        = df_cognition2$premium_w,
  se       = df_cognition2$se_premium_w,
  noncognitive_skill_control = df_cognition2$noncognitive_skill_control,
  study_id = df_cognition2$study_id
)

df_cognition3 <- df[df$cognitive_measured == 1 | df$quasi_experimental_method ==1, ]

df_salary <- df
table(df_salary$salary)
df_salary_reg <- data.frame(
  y        = df_salary$premium_w,
  se       = df_salary$se_premium_w,
  salary   = factor(df_salary$salary),
  study_id = df_salary$study_id
)

# 5.3 Adjustment datasets (RoBMA.reg)
df_adj <- data.frame(
  y                 = df$premium_w,
  se                = df$se_premium_w,
  study_id          = df$study_id,
  database_id       = df$database_id,
  prostitues        = factor(df$prostitutes),
  quasi_OR_control  = factor(as.numeric(df$quasi_experimental_method | df$cognitive_skill_control), levels = c(1, 0))
)

df_adj_noprostitutes <- data.frame(
  y                 = df_noprostitutes$premium_w,
  se                = df_noprostitutes$se_premium_w,
  study_id          = df_noprostitutes$study_id,
  database_id       = df_noprostitutes$database_id,
  quasi_OR_control  = factor(as.numeric(df_noprostitutes$quasi_experimental_method | df_noprostitutes$cognitive_skill_control), levels = c(1, 0))
)

df_adj_scalability <- data.frame(
  y                 = df_scalability$premium_sd_w,
  se                = df_scalability$se_premium_sd,
  study_id          = df_scalability$study_id,
  database_id       = df_scalability$database_id,
  prostitues        = factor(df_scalability$prostitutes),
  quasi_OR_control  = factor(as.numeric(df_scalability$quasi_experimental_method | df_scalability$cognitive_skill_control), levels = c(1, 0))
)

df_adj_industry_reg <- df_industry_reg
df_adj_industry_reg$prostitues <- factor(df_industry$prostitutes)
df_adj_industry_reg$quasi_OR_control <- factor(as.numeric(df_industry$quasi_experimental_method | df_industry$cognitive_skill_control), levels = c(1, 0))

df_adj_occupation_reg <- df_occupation_reg
df_adj_occupation_reg$prostitues <- factor(df_occupation$prostitutes)
df_adj_occupation_reg$quasi_OR_control <- factor(as.numeric(df_occupation$quasi_experimental_method | df_occupation$cognitive_skill_control), levels = c(1, 0))

df_adj_gender_of_rated_reg <- df_gender_of_rated_reg
df_adj_gender_of_rated_reg$prostitues <- factor(df_gender_of_rated$prostitutes)
df_adj_gender_of_rated_reg$quasi_OR_control <- factor(as.numeric(df_gender_of_rated$quasi_experimental_method | df_gender_of_rated$cognitive_skill_control), levels = c(1, 0))

df_adj_facing_customer_reg <- df_facing_customer_reg
df_adj_facing_customer_reg$prostitues <- factor(df_facing_customer$prostitutes)
df_adj_facing_customer_reg$quasi_OR_control <- factor(as.numeric(df_facing_customer$quasi_experimental_method | df_facing_customer$cognitive_skill_control), levels = c(1, 0))

df_adj_output_measurability_dummy_reg <- df_output_measurability_dummy_reg
df_adj_output_measurability_dummy_reg$prostitues <- factor(df_output_measurability_dummy$prostitutes)
df_adj_output_measurability_dummy_reg$quasi_OR_control <- factor(as.numeric(df_output_measurability_dummy$quasi_experimental_method | df_output_measurability_dummy$cognitive_skill_control), levels = c(1, 0))

df_adj_interaction_intensity_dummy_reg <- df_interaction_intensity_dummy_reg
df_adj_interaction_intensity_dummy_reg$prostitues <- factor(df_interaction_intensity_dummy$prostitutes)
df_adj_interaction_intensity_dummy_reg$quasi_OR_control <- factor(as.numeric(df_interaction_intensity_dummy$quasi_experimental_method | df_interaction_intensity_dummy$cognitive_skill_control), levels = c(1, 0))

df_adj_cognition_reg <- df_cognition_reg
df_adj_cognition_reg$prostitues <- factor(df_cognition$prostitutes)
df_adj_cognition_reg$quasi_OR_control <- factor(as.numeric(df_cognition$quasi_experimental_method | df_cognition$cognitive_skill_control), levels = c(1, 0))

df_adj_cognition2_reg <- df_cognition2_reg
df_adj_cognition2_reg$prostitues <- factor(df_cognition2$prostitutes)
df_adj_cognition2_reg$quasi_OR_control <- factor(as.numeric(df_cognition2$quasi_experimental_method | df_cognition2$cognitive_skill_control), levels = c(1, 0))

df_adj_cognition3 <- data.frame(
  y                 = df_cognition3$premium_w,
  se                = df_cognition3$se_premium_w,
  study_id          = df_cognition3$study_id,
  prostitues        = factor(df_cognition3$prostitutes),
  quasi_OR_control  = factor(as.numeric(df_cognition3$quasi_experimental_method | df_cognition3$cognitive_skill_control), levels = c(1, 0))
)

df_adj_salary_reg <- df_salary_reg
df_adj_salary_reg$prostitues <- factor(df_salary$prostitutes)
df_adj_salary_reg$quasi_OR_control <- factor(as.numeric(df_salary$quasi_experimental_method | df_salary$cognitive_skill_control), levels = c(1, 0))

df_adj_occupation_reg <- df_occupation_reg
df_adj_occupation_reg$prostitues <- factor(df_occupation$prostitutes)
df_adj_occupation_reg$quasi_OR_control <- factor(as.numeric(df_occupation$quasi_experimental_method | df_occupation$cognitive_skill_control), levels = c(1, 0))

# 5.4 Prior scaling for RoBMA models
# the prior distributions need to be adjusted because they are specified on standardized effect sizes
fit_scale     <- metafor::rma(yi = premium_w, sei = se_premium_w, data = df, method = "FE")
outcome_scale <- fit_scale$se * sqrt(sum(df$nobs))    # prior scaling factor
prior_scale   <- outcome_scale * RoBMA:::scale_d2z(1) # prior has a scale of 1 on Cohen's d => rescaling proportionally

fit_scale2     <- metafor::rma(yi = premium_sd_w, sei = se_premium_sd_w, data = df_scalability, method = "FE")
outcome_scale2 <- fit_scale2$se * sqrt(sum(df_scalability$nobs))    # prior scaling factor
prior_scale2   <- outcome_scale2 * RoBMA:::scale_d2z(1) # prior has a scale of 1 on Cohen's d => rescaling proportionally

### fit the models
library(RoBMA)

# 5.5 RoBMA main models
job::job({
  fit_robma <- RoBMA(y = df$premium_w, se = df$se_premium_w, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                     adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma, file = "fit_robma.RDS")
})
job::job({
  fit_robma.3lvl <- RoBMA(y = df$premium_w, se = df$se_premium_w, study_ids = df$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                          adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl, file = "fit_robma.3lvl.RDS")
})
job::job({
  fit_robma.3lvl_database <- RoBMA(y = df$premium_w, se = df$se_premium_w, study_ids = df$database_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                   adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_database, file = "fit_robma.3lvl_database.RDS")
})
job::job({
  fit_robma_noprost <- RoBMA(y = df_noprostitutes$premium_w, se = df_noprostitutes$se_premium_w, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                             adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_noprost, file = "fit_robma_noprost.RDS")
})
job::job({
  fit_robma.3lvl_noprost <- RoBMA(y = df_noprostitutes$premium_w, se = df_noprostitutes$se_premium_w, study_ids = df_noprostitutes$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                  adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_noprost, file = "fit_robma.3lvl_noprost.RDS")
})
job::job({
  fit_robma_scalability <- RoBMA(y = df_scalability$premium_sd_w, se = df_scalability$se_premium_sd, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale2,
                                 adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_scalability, file = "fit_robma_scalability.RDS")
})
job::job({
  fit_robma.3lvl_scalability <- RoBMA(y = df_scalability$premium_sd_w, se = df_scalability$se_premium_sd, study_ids = df_scalability$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale2,
                                      adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_scalability, file = "fit_robma.3lvl_scalability.RDS")
})

# 5.6 NoBMA for z-curve comparison
job::job({
  fit_nobma.3lvl <- NoBMA(y = df$premium_w, se = df$se_premium_w, study_ids = df$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                          adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_nobma.3lvl, file = "fit_nobma.3lvl.RDS")
})

# 5.7 RoBMA subgroups (industry, occupation, etc.)
job::job({
  fit_robma_industry <- RoBMA.reg(~ industry, data = df_industry_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                  prior_scale = "none", transformation = "none",
                                  adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_industry, file = "fit_robma_industry.RDS")
})
job::job({
  fit_robma.3lvl_industry <- RoBMA.reg(~ industry, data = df_industry_reg, study_ids = df_industry_reg$study_ids, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                       prior_scale = "none", transformation = "none",,
                                       adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_industry, file = "fit_robma.3lvl_industry.RDS")
})

job::job({
  fit_robma_occupation <- RoBMA.reg(~ occupation, data = df_occupation_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                    prior_scale = "none", transformation = "none",
                                    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_occupation, file = "fit_robma_occupation.RDS")
})
job::job({
  fit_robma.3lvl_occupation <- RoBMA.reg(~ occupation, data = df_occupation_reg, study_ids = df_occupation_reg$study_ids, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                         prior_scale = "none", transformation = "none",,
                                         adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_occupation, file = "fit_robma.3lvl_occupation.RDS")
})

job::job({
  fit_robma_gender_of_rated <- RoBMA.reg(~ gender_of_rated, data = df_gender_of_rated_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                         prior_scale = "none", transformation = "none",
                                         adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_gender_of_rated, file = "fit_robma_gender_of_rated.RDS")
})
job::job({
  fit_robma.3lvl_gender_of_rated <- RoBMA.reg(~ gender_of_rated, data = df_gender_of_rated_reg, study_ids = df_gender_of_rated_reg$study_ids, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                              prior_scale = "none", transformation = "none",,
                                              adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_gender_of_rated, file = "fit_robma.3lvl_gender_of_rated.RDS")
})

job::job({
  fit_robma_facing_customer <- RoBMA.reg(~ facing_customer, data = df_facing_customer_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                         prior_scale = "none", transformation = "none",
                                         adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_facing_customer, file = "fit_robma_facing_customer.RDS")
})
job::job({
  fit_robma.3lvl_facing_customer <- RoBMA.reg(~ facing_customer, data = df_facing_customer_reg, study_ids = df_facing_customer_reg$study_ids, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                              prior_scale = "none", transformation = "none",,
                                              adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_facing_customer, file = "fit_robma.3lvl_facing_customer.RDS")
})

job::job({
  fit_robma_output_measurability_dummy <- RoBMA.reg(~ output_measurability_dummy, data = df_output_measurability_dummy_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                    prior_scale = "none", transformation = "none",
                                                    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_output_measurability_dummy, file = "fit_robma_output_measurability_dummy.RDS")
})
job::job({
  fit_robma.3lvl_output_measurability_dummy <- RoBMA.reg(~ output_measurability_dummy, data = df_output_measurability_dummy_reg, study_ids = df_output_measurability_dummy_reg$study_ids, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                         prior_scale = "none", transformation = "none",,
                                                         adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_output_measurability_dummy, file = "fit_robma.3lvl_output_measurability_dummy.RDS")
})

job::job({
  fit_robma_interaction_intensity_dummy <- RoBMA.reg(~ interaction_intensity_dummy, data = df_interaction_intensity_dummy_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                     prior_scale = "none", transformation = "none",
                                                     adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_interaction_intensity_dummy, file = "fit_robma_interaction_intensity_dummy.RDS")
})
job::job({
  fit_robma.3lvl_interaction_intensity_dummy <- RoBMA.reg(~ interaction_intensity_dummy, data = df_interaction_intensity_dummy_reg, study_ids = df_interaction_intensity_dummy_reg$study_ids, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                          prior_scale = "none", transformation = "none",,
                                                          adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_interaction_intensity_dummy, file = "fit_robma.3lvl_interaction_intensity_dummy.RDS")
})

job::job({
  fit_robma_cognition <- RoBMA.reg(~ control, data = df_cognition_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                   prior_scale = "none", transformation = "none",
                                   adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_cognition, file = "fit_robma_cognition.RDS")
})
job::job({
  fit_robma.3lvl_cognition <- RoBMA.reg(~ control, data = df_cognition_reg, study_ids = df_cognition_reg$study_ids, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                        prior_scale = "none", transformation = "none",,
                                        adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_cognition, file = "fit_robma.3lvl_cognition.RDS")
})

job::job({
  fit_robma_cognition2 <- RoBMA.reg(~ noncognitive_skill_control, data = df_cognition2_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                    prior_scale = "none", transformation = "none",
                                    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_cognition2, file = "fit_robma_cognition2.RDS")
})
job::job({
  fit_robma.3lvl_cognition2 <- RoBMA.reg(~ noncognitive_skill_control, data = df_cognition2_reg, study_ids = df_cognition2_reg$study_ids, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                         prior_scale = "none", transformation = "none",
                                         adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_cognition2, file = "fit_robma.3lvl_cognition2.RDS")
})

job::job({
  fit_robma_cognition3 <- RoBMA(y = df_cognition3$premium_w, se = df_cognition3$se_premium_w, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_cognition3, file = "fit_robma_cognition3.RDS")
})
job::job({
  fit_robma.3lvl_cognition3 <- RoBMA(y = df_cognition3$premium_w, se = df_cognition3$se_premium_w, study_ids = df_cognition3$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                     adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_cognition3, file = "fit_robma.3lvl_cognition3.RDS")
})

job::job({
  fit_robma_salary <- RoBMA.reg(~ salary, data = df_salary_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                prior_scale = "none", transformation = "none",
                                adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma_salary, file = "fit_robma_salary.RDS")
})
job::job({
  fit_robma.3lvl_salary <- RoBMA.reg(~ salary, data = df_salary_reg, study_ids = df_salary_reg$study_ids, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                     prior_scale = "none", transformation = "none",,
                                     adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1)
  saveRDS(fit_robma.3lvl_salary, file = "fit_robma.3lvl_salary.RDS")
})

### 5.8 Adjusted RoBMA models - main models
job::job({
  fit_adj_robma <- RoBMA.reg(
    ~ prostitues + quasi_OR_control, data = df_adj, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
    prior_scale = "none", transformation = "none",
    priors = list(
      prostitues = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      ),
      quasi_OR_control = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      )
    ),
    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma, file = "fit_adj_robma.RDS")
})
job::job({
  fit_adj_robma.3lvl <- RoBMA.reg(
    ~ prostitues + quasi_OR_control, data = df_adj, study_ids = df_adj$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
    prior_scale = "none", transformation = "none",
    priors = list(
      prostitues = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      ),
      quasi_OR_control = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      )
    ),
    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl, file = "fit_adj_robma.3lvl.RDS")
})
job::job({
  fit_adj_robma.3lvl_database <- RoBMA.reg(
    ~ prostitues + quasi_OR_control, data = df_adj, study_ids = df_adj$database_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
    prior_scale = "none", transformation = "none",
    priors = list(
      prostitues = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      ),
      quasi_OR_control = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      )
    ),
    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_database, file = "fit_adj_robma.3lvl_database.RDS")
})
job::job({
  fit_adj_robma_noprost <- RoBMA.reg(
    ~ quasi_OR_control, data = df_adj_noprostitutes, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
    prior_scale = "none", transformation = "none",
    priors = list(
      quasi_OR_control = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      )
    ),
    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_noprost, file = "fit_adj_robma_noprost.RDS")
})
job::job({
  fit_adj_robma.3lvl_noprost <- RoBMA.reg(
    ~ quasi_OR_control, data = df_adj_noprostitutes, study_ids = df_adj_noprostitutes$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
    prior_scale = "none", transformation = "none",
    priors = list(
      quasi_OR_control = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      )
    ),
    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_noprost, file = "fit_adj_robma.3lvl_noprost.RDS")
})
job::job({
  fit_adj_robma_scalability <- RoBMA.reg(
    ~ prostitues + quasi_OR_control, data = df_adj_scalability, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale2,
    prior_scale = "none", transformation = "none",
    priors = list(
      prostitues = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale2 / 4), contrast = "treatment")
      ),
      quasi_OR_control = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale2 / 4), contrast = "treatment")
      )
    ),
    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_scalability, file = "fit_adj_robma_scalability.RDS")
})
job::job({
  fit_adj_robma.3lvl_scalability <- RoBMA.reg(
    ~ prostitues + quasi_OR_control, data = df_adj_scalability, study_ids = df_adj_scalability$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale2,
    prior_scale = "none", transformation = "none",
    priors = list(
      prostitues = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale2 / 4), contrast = "treatment")
      ),
      quasi_OR_control = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale2 / 4), contrast = "treatment")
      )
    ),
    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_scalability, file = "fit_adj_robma.3lvl_scalability.RDS")
})

 

5.8
job::job({
  fit_adj_robma_industry <- RoBMA.reg(~ industry + prostitues + quasi_OR_control, data = df_adj_industry_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                      prior_scale = "none", transformation = "none",
                                      priors = list(
                                        industry = list(
                                          null = prior_factor("spike", list(0), contrast = "meandif"),
                                          alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                        ),
                                        prostitues = list(
                                          null = prior_factor("spike", list(0), contrast = "treatment"),
                                          alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                        ),
                                        quasi_OR_control = list(
                                          null = prior_factor("spike", list(0), contrast = "treatment"),
                                          alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                        )
                                      ),
                                      adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_industry, file = "fit_adj_robma_industry.RDS")
})
job::job({
  fit_adj_robma.3lvl_industry <- RoBMA.reg(~ industry + prostitues + quasi_OR_control, data = df_adj_industry_reg, study_ids = df_adj_industry_reg$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                           prior_scale = "none", transformation = "none",
                                           priors = list(
                                             industry = list(
                                               null = prior_factor("spike", list(0), contrast = "meandif"),
                                               alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                             ),
                                             prostitues = list(
                                               null = prior_factor("spike", list(0), contrast = "treatment"),
                                               alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                             ),
                                             quasi_OR_control = list(
                                               null = prior_factor("spike", list(0), contrast = "treatment"),
                                               alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                             )
                                           ),
                                           adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_industry, file = "fit_adj_robma.3lvl_industry.RDS")
})

job::job({
  fit_adj_robma_occupation <- RoBMA.reg(~ occupation + prostitues + quasi_OR_control, data = df_adj_occupation_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                        prior_scale = "none", transformation = "none",
                                        priors = list(
                                          occupation = list(
                                            null = prior_factor("spike", list(0), contrast = "meandif"),
                                            alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                          ),
                                          prostitues = list(
                                            null = prior_factor("spike", list(0), contrast = "treatment"),
                                            alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                          ),
                                          quasi_OR_control = list(
                                            null = prior_factor("spike", list(0), contrast = "treatment"),
                                            alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                          )
                                        ),
                                        adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_occupation, file = "fit_adj_robma_occupation.RDS")
})
job::job({
  fit_adj_robma.3lvl_occupation <- RoBMA.reg(~ occupation + prostitues + quasi_OR_control, data = df_adj_occupation_reg, study_ids = df_adj_occupation_reg$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                             prior_scale = "none", transformation = "none",
                                             priors = list(
                                               occupation = list(
                                                 null = prior_factor("spike", list(0), contrast = "meandif"),
                                                 alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                               ),
                                               prostitues = list(
                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                               ),
                                               quasi_OR_control = list(
                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                               )
                                             ),
                                             adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_occupation, file = "fit_adj_robma.3lvl_occupation.RDS")
})

job::job({
  fit_adj_robma_gender_of_rated <- RoBMA.reg(~ gender_of_rated + prostitues + quasi_OR_control, data = df_adj_gender_of_rated_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                             prior_scale = "none", transformation = "none",
                                             priors = list(
                                               gender_of_rated = list(
                                                 null = prior_factor("spike", list(0), contrast = "meandif"),
                                                 alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                               ),
                                               prostitues = list(
                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                               ),
                                               quasi_OR_control = list(
                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                               )
                                             ),
                                             adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_gender_of_rated, file = "fit_adj_robma_gender_of_rated.RDS")
})
job::job({
  fit_adj_robma.3lvl_gender_of_rated <- RoBMA.reg(~ gender_of_rated + prostitues + quasi_OR_control, data = df_adj_gender_of_rated_reg, study_ids = df_adj_gender_of_rated_reg$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                  prior_scale = "none", transformation = "none",
                                                  priors = list(
                                                    gender_of_rated = list(
                                                      null = prior_factor("spike", list(0), contrast = "meandif"),
                                                      alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                                    ),
                                                    prostitues = list(
                                                      null = prior_factor("spike", list(0), contrast = "treatment"),
                                                      alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                    ),
                                                    quasi_OR_control = list(
                                                      null = prior_factor("spike", list(0), contrast = "treatment"),
                                                      alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                    )
                                                  ),
                                                  adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_gender_of_rated, file = "fit_adj_robma.3lvl_gender_of_rated.RDS")
})

job::job({
  fit_adj_robma_facing_customer <- RoBMA.reg(~ facing_customer + prostitues + quasi_OR_control, data = df_adj_facing_customer_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                             prior_scale = "none", transformation = "none",
                                             priors = list(
                                               facing_customer = list(
                                                 null = prior_factor("spike", list(0), contrast = "meandif"),
                                                 alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                               ),
                                               prostitues = list(
                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                               ),
                                               quasi_OR_control = list(
                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                               )
                                             ),
                                             adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_facing_customer, file = "fit_adj_robma_facing_customer.RDS")
})
job::job({
  fit_adj_robma.3lvl_facing_customer <- RoBMA.reg(~ facing_customer + prostitues + quasi_OR_control, data = df_adj_facing_customer_reg, study_ids = df_adj_facing_customer_reg$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                  prior_scale = "none", transformation = "none",
                                                  priors = list(
                                                    facing_customer = list(
                                                      null = prior_factor("spike", list(0), contrast = "meandif"),
                                                      alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                                    ),
                                                    prostitues = list(
                                                      null = prior_factor("spike", list(0), contrast = "treatment"),
                                                      alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                    ),
                                                    quasi_OR_control = list(
                                                      null = prior_factor("spike", list(0), contrast = "treatment"),
                                                      alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                    )
                                                  ),
                                                  adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_facing_customer, file = "fit_adj_robma.3lvl_facing_customer.RDS")
})

job::job({
  fit_adj_robma_output_measurability_dummy <- RoBMA.reg(~ output_measurability_dummy + prostitues + quasi_OR_control, data = df_adj_output_measurability_dummy_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                        prior_scale = "none", transformation = "none",
                                                        priors = list(
                                                          output_measurability_dummy = list(
                                                            null = prior_factor("spike", list(0), contrast = "meandif"),
                                                            alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                                          ),
                                                          prostitues = list(
                                                            null = prior_factor("spike", list(0), contrast = "treatment"),
                                                            alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                          ),
                                                          quasi_OR_control = list(
                                                            null = prior_factor("spike", list(0), contrast = "treatment"),
                                                            alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                          )
                                                        ),
                                                        adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_output_measurability_dummy, file = "fit_adj_robma_output_measurability_dummy.RDS")
})
job::job({
  fit_adj_robma.3lvl_output_measurability_dummy <- RoBMA.reg(~ output_measurability_dummy + prostitues + quasi_OR_control, data = df_adj_output_measurability_dummy_reg, study_ids = df_adj_output_measurability_dummy_reg$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                             prior_scale = "none", transformation = "none",
                                                             priors = list(
                                                               output_measurability_dummy = list(
                                                                 null = prior_factor("spike", list(0), contrast = "meandif"),
                                                                 alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                                               ),
                                                               prostitues = list(
                                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                               ),
                                                               quasi_OR_control = list(
                                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                               )
                                                             ),
                                                             adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_output_measurability_dummy, file = "fit_adj_robma.3lvl_output_measurability_dummy.RDS")
})

job::job({
  fit_adj_robma_interaction_intensity_dummy <- RoBMA.reg(~ interaction_intensity_dummy + prostitues + quasi_OR_control, data = df_adj_interaction_intensity_dummy_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                         prior_scale = "none", transformation = "none",
                                                         priors = list(
                                                           interaction_intensity_dummy = list(
                                                             null = prior_factor("spike", list(0), contrast = "meandif"),
                                                             alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                                           ),
                                                           prostitues = list(
                                                             null = prior_factor("spike", list(0), contrast = "treatment"),
                                                             alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                           ),
                                                           quasi_OR_control = list(
                                                             null = prior_factor("spike", list(0), contrast = "treatment"),
                                                             alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                           )
                                                         ),
                                                         adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_interaction_intensity_dummy, file = "fit_adj_robma_interaction_intensity_dummy.RDS")
})
job::job({
  fit_adj_robma.3lvl_interaction_intensity_dummy <- RoBMA.reg(~ interaction_intensity_dummy + prostitues + quasi_OR_control, data = df_adj_interaction_intensity_dummy_reg, study_ids = df_adj_interaction_intensity_dummy_reg$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                                              prior_scale = "none", transformation = "none",
                                                              priors = list(
                                                                interaction_intensity_dummy = list(
                                                                  null = prior_factor("spike", list(0), contrast = "meandif"),
                                                                  alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                                                ),
                                                                prostitues = list(
                                                                  null = prior_factor("spike", list(0), contrast = "treatment"),
                                                                  alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                                ),
                                                                quasi_OR_control = list(
                                                                  null = prior_factor("spike", list(0), contrast = "treatment"),
                                                                  alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                                                )
                                                              ),
                                                              adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_interaction_intensity_dummy, file = "fit_adj_robma.3lvl_interaction_intensity_dummy.RDS")
})

job::job({
  fit_adj_robma_cognition <- RoBMA.reg(~ control + prostitues, data = df_adj_cognition_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                       prior_scale = "none", transformation = "none",
                                       priors = list(
                                         control = list(
                                           null = prior_factor("spike", list(0), contrast = "meandif"),
                                           alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                         ),
                                         prostitues = list(
                                           null = prior_factor("spike", list(0), contrast = "treatment"),
                                           alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                         )
                                       ),
                                       adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_cognition, file = "fit_adj_robma_cognition.RDS")
})
job::job({
  fit_adj_robma.3lvl_cognition <- RoBMA.reg(~ control + prostitues, data = df_adj_cognition_reg, study_ids = df_adj_cognition_reg$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                            prior_scale = "none", transformation = "none",
                                            priors = list(
                                              control = list(
                                                null = prior_factor("spike", list(0), contrast = "meandif"),
                                                alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                              ),
                                              prostitues = list(
                                                null = prior_factor("spike", list(0), contrast = "treatment"),
                                                alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                              )
                                            ),
                                            adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_cognition, file = "fit_adj_robma.3lvl_cognition.RDS")
})

job::job({
  fit_adj_robma_cognition2 <- RoBMA.reg(~ noncognitive_skill_control + prostitues, data = df_adj_cognition2_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                        prior_scale = "none", transformation = "none",
                                        priors = list(
                                          noncognitive_skill_control = list(
                                            null = prior_factor("spike", list(0), contrast = "meandif"),
                                            alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                          ),
                                          prostitues = list(
                                            null = prior_factor("spike", list(0), contrast = "treatment"),
                                            alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                          )
                                        ),
                                        adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_cognition2, file = "fit_adj_robma_cognition2.RDS")
})
job::job({
  fit_adj_robma.3lvl_cognition2 <- RoBMA.reg(~ noncognitive_skill_control + prostitues, data = df_adj_cognition2_reg, study_ids = df_adj_cognition2_reg$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                             prior_scale = "none", transformation = "none",
                                             priors = list(
                                               noncognitive_skill_control = list(
                                                 null = prior_factor("spike", list(0), contrast = "meandif"),
                                                 alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                               ),
                                               prostitues = list(
                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                               )
                                             ),
                                             adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_cognition2, file = "fit_adj_robma.3lvl_cognition2.RDS")
})

job::job({
  fit_adj_robma_cognition3 <- RoBMA.reg(~ prostitues, data = df_adj_cognition3, study_ids = df_adj_cognition3$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                        prior_scale = "none", transformation = "none",
                                        priors = list(
                                          prostitues = list(
                                            null = prior_factor("spike", list(0), contrast = "treatment"),
                                            alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                          )
                                        ),
                                        adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_cognition3, file = "fit_adj_robma_cognition3.RDS")
})
job::job({
  fit_adj_robma.3lvl_cognition3 <- RoBMA.reg(~ prostitues, data = df_adj_cognition3, study_ids = df_adj_cognition3$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                             prior_scale = "none", transformation = "none",
                                             priors = list(
                                               prostitues = list(
                                                 null = prior_factor("spike", list(0), contrast = "treatment"),
                                                 alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                               )
                                             ),
                                             adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_cognition3, file = "fit_adj_robma.3lvl_cognition3.RDS")
})

job::job({
  fit_adj_robma_salary <- RoBMA.reg(~ salary + prostitues + quasi_OR_control, data = df_adj_salary_reg, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                    prior_scale = "none", transformation = "none",
                                    priors = list(
                                      salary = list(
                                        null = prior_factor("spike", list(0), contrast = "meandif"),
                                        alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                      ),
                                      prostitues = list(
                                        null = prior_factor("spike", list(0), contrast = "treatment"),
                                        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                      ),
                                      quasi_OR_control = list(
                                        null = prior_factor("spike", list(0), contrast = "treatment"),
                                        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                      )
                                    ),
                                    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma_salary, file = "fit_adj_robma_salary.RDS")
})
job::job({
  fit_adj_robma.3lvl_salary <- RoBMA.reg(~ salary + prostitues + quasi_OR_control, data = df_adj_salary_reg, study_ids = df_adj_salary_reg$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
                                         prior_scale = "none", transformation = "none",
                                         priors = list(
                                           salary = list(
                                             null = prior_factor("spike", list(0), contrast = "meandif"),
                                             alt  = prior_factor("mnormal", list(0, prior_scale / 4), contrast = "meandif")
                                           ),
                                           prostitues = list(
                                             null = prior_factor("spike", list(0), contrast = "treatment"),
                                             alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                           ),
                                           quasi_OR_control = list(
                                             null = prior_factor("spike", list(0), contrast = "treatment"),
                                             alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
                                           )
                                         ),
                                         adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_robma.3lvl_salary, file = "fit_adj_robma.3lvl_salary.RDS")
})

# Add for plotting
job::job({
  fit_adj_nobma <- NoBMA.reg(
    ~ prostitues + quasi_OR_control, data = df_adj, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
    prior_scale = "none", transformation = "none",
    priors = list(
      prostitues = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      ),
      quasi_OR_control = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      )
    ),
    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_nobma, file = "fit_adj_nobma.RDS")
})
job::job({
  fit_adj_nobma.3lvl <- NoBMA.reg(
    ~ prostitues + quasi_OR_control, data = df_adj, study_ids = df_adj$study_id, algorithm = "ss", parallel = TRUE, rescale_priors = prior_scale,
    prior_scale = "none", transformation = "none",
    priors = list(
      prostitues = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      ),
      quasi_OR_control = list(
        null = prior_factor("spike", list(0), contrast = "treatment"),
        alt  = prior_factor("normal", list(0, prior_scale / 4), contrast = "treatment")
      )
    ),
    adapt = 10000, burnin = 10000, sample = 10000, chains = 10, seed = 1, autofit = FALSE)
  saveRDS(fit_adj_nobma.3lvl, file = "fit_adj_nobma.3lvl.RDS")
})


### 5.9 load results ----
## unadjusted
# full sample
fit_robma               <- readRDS(file = "fit_robma.RDS")
fit_robma.3lvl          <- readRDS(file = "fit_robma.3lvl.RDS")
fit_robma.3lvl_database <- readRDS(file = "fit_robma.3lvl_database.RDS") # 3lvl using database
fit_robma_noprost       <- readRDS(file = "fit_robma_noprost.RDS")       # no prostitutes
fit_robma.3lvl_noprost  <- readRDS(file = "fit_robma.3lvl_noprost.RDS")
fit_robma_scalability       <- readRDS(file = "fit_robma_scalability.RDS")       # scalability sd
fit_robma.3lvl_scalability  <- readRDS(file = "fit_robma.3lvl_scalability.RDS")

# subgroups
fit_robma_industry                         <-  readRDS(file = "fit_robma_industry.RDS")
fit_robma.3lvl_industry                    <-  readRDS(file = "fit_robma.3lvl_industry.RDS")
fit_robma_occupation                       <-  readRDS(file = "fit_robma_occupation.RDS")
fit_robma.3lvl_occupation                  <-  readRDS(file = "fit_robma.3lvl_occupation.RDS")
fit_robma_gender_of_rated                  <-  readRDS(file = "fit_robma_gender_of_rated.RDS")
fit_robma.3lvl_gender_of_rated             <-  readRDS(file = "fit_robma.3lvl_gender_of_rated.RDS")
fit_robma_facing_customer                  <-  readRDS(file = "fit_robma_facing_customer.RDS")
fit_robma.3lvl_facing_customer             <-  readRDS(file = "fit_robma.3lvl_facing_customer.RDS")
fit_robma_output_measurability_dummy       <-  readRDS(file = "fit_robma_output_measurability_dummy.RDS")
fit_robma.3lvl_output_measurability_dummy  <-  readRDS(file = "fit_robma.3lvl_output_measurability_dummy.RDS")
fit_robma_interaction_intensity_dummy      <-  readRDS(file = "fit_robma_interaction_intensity_dummy.RDS")
fit_robma.3lvl_interaction_intensity_dummy <-  readRDS(file = "fit_robma.3lvl_interaction_intensity_dummy.RDS")
fit_robma_cognition                        <-  readRDS(file = "fit_robma_cognition.RDS")
fit_robma.3lvl_cognition                   <-  readRDS(file = "fit_robma.3lvl_cognition.RDS")
fit_robma_cognition2                       <-  readRDS(file = "fit_robma_cognition2.RDS")
fit_robma.3lvl_cognition2                  <-  readRDS(file = "fit_robma.3lvl_cognition2.RDS")
fit_robma_cognition3                       <-  readRDS(file = "fit_robma_cognition3.RDS")
fit_robma.3lvl_cognition3                  <-  readRDS(file = "fit_robma.3lvl_cognition3.RDS")
fit_robma_salary                           <-  readRDS(file = "fit_robma_salary.RDS")
fit_robma.3lvl_salary                      <-  readRDS(file = "fit_robma.3lvl_salary.RDS")

## adjusted
fit_adj_robma               <- readRDS(file = "fit_adj_robma.RDS")
fit_adj_robma.3lvl          <- readRDS(file = "fit_adj_robma.3lvl.RDS")
fit_adj_robma.3lvl_database <- readRDS(file = "fit_adj_robma.3lvl_database.RDS") # 3lvl using database
fit_adj_robma_noprost       <- readRDS(file = "fit_adj_robma_noprost.RDS")       # no prostitutes
fit_adj_robma.3lvl_noprost  <- readRDS(file = "fit_adj_robma.3lvl_noprost.RDS")
fit_adj_robma_scalability       <- readRDS(file = "fit_adj_robma_scalability.RDS")       # scalability sd
fit_adj_robma.3lvl_scalability  <- readRDS(file = "fit_adj_robma.3lvl_scalability.RDS")

# subgroups
fit_adj_robma_industry                         <-  readRDS(file = "fit_adj_robma_industry.RDS")
fit_adj_robma.3lvl_industry                    <-  readRDS(file = "fit_adj_robma.3lvl_industry.RDS")
fit_adj_robma_occupation                       <-  readRDS(file = "fit_adj_robma_occupation.RDS")
fit_adj_robma.3lvl_occupation                  <-  readRDS(file = "fit_adj_robma.3lvl_occupation.RDS")
fit_adj_robma_gender_of_rated                  <-  readRDS(file = "fit_adj_robma_gender_of_rated.RDS")
fit_adj_robma.3lvl_gender_of_rated             <-  readRDS(file = "fit_adj_robma.3lvl_gender_of_rated.RDS")
fit_adj_robma_facing_customer                  <-  readRDS(file = "fit_adj_robma_facing_customer.RDS")
fit_adj_robma.3lvl_facing_customer             <-  readRDS(file = "fit_adj_robma.3lvl_facing_customer.RDS")
fit_adj_robma_output_measurability_dummy       <-  readRDS(file = "fit_adj_robma_output_measurability_dummy.RDS")
fit_adj_robma.3lvl_output_measurability_dummy  <-  readRDS(file = "fit_adj_robma.3lvl_output_measurability_dummy.RDS")
fit_adj_robma_interaction_intensity_dummy      <-  readRDS(file = "fit_adj_robma_interaction_intensity_dummy.RDS")
fit_adj_robma.3lvl_interaction_intensity_dummy <-  readRDS(file = "fit_adj_robma.3lvl_interaction_intensity_dummy.RDS")
fit_adj_robma_cognition                        <-  readRDS(file = "fit_adj_robma_cognition.RDS")
fit_adj_robma.3lvl_cognition                   <-  readRDS(file = "fit_adj_robma.3lvl_cognition.RDS")
fit_adj_robma_cognition2                       <-  readRDS(file = "fit_adj_robma_cognition2.RDS")
fit_adj_robma.3lvl_cognition2                  <-  readRDS(file = "fit_adj_robma.3lvl_cognition2.RDS")
fit_adj_robma_cognition3                       <-  readRDS(file = "fit_adj_robma_cognition3.RDS")
fit_adj_robma.3lvl_cognition3                  <-  readRDS(file = "fit_adj_robma.3lvl_cognition3.RDS")
fit_adj_robma_salary                           <-  readRDS(file = "fit_adj_robma_salary.RDS")
fit_adj_robma.3lvl_salary                      <-  readRDS(file = "fit_adj_robma.3lvl_salary.RDS")

### summary function ----
add_row <- function(name, fit) {
  print(name)
  if (!is.RoBMA.reg(fit)) {
    N_est   <- nrow(fit$data)
    N_clstr <- if(all(is.na(fit$data$study_ids))) NA else length(unique(fit$data$study_ids))
  } else {
    N_est   <- nrow(fit$data$outcome)
    N_clstr <- if(all(is.na(fit$data$outcome$study_ids))) NA else length(unique(fit$data$outcome$study_ids))
  }
  
  # pooled estimates
  summary_fit <- summary(fit)
  est  <- summary_fit$estimates["mu", "Mean"]
  lci  <- summary_fit$estimates["mu", "0.025"]
  uci  <- summary_fit$estimates["mu", "0.975"]
  tau_est  <- summary_fit$estimates["tau", "Mean"]
  tau_lci  <- summary_fit$estimates["tau", "0.025"]
  tau_uci  <- summary_fit$estimates["tau", "0.975"]
  BF10   <- summary_fit$components["Effect", "inclusion_BF"]
  BFhet  <- summary_fit$components["Heterogeneity", "inclusion_BF"]
  BFbias <- summary_fit$components["Bias", "inclusion_BF"]
  
  
  if (is.RoBMA.reg(fit)) {
    # add marginal summary
    margsummary_fit <- as.data.frame(marginal_summary(fit)$estimates)[-1,-2]
    # suppress control variables
    margsummary_fit <- margsummary_fit[!(grepl("prostitues", rownames(margsummary_fit)) | grepl("quasi_OR_control", rownames(margsummary_fit))), ]
    
    # get the correct N_est
    pred_name <- substr(rownames(margsummary_fit), 1, regexpr("[", rownames(margsummary_fit), fixed = TRUE)-1)
    if(length(unique(pred_name)) > 1)
      stop("multiple predictors")
    
    pred_name <- unique(pred_name)
    if(length(pred_name) == 0) {
      # no predictor of interest -- only adjustment
      BFmod <- NA
    }else{
      BFmod     <- summary_fit$components_predictors[pred_name ,"inclusion_BF"]
    }
  } else {
    BFmod    <- NA
  }
  
  out_main <- data.frame(
    name  = name,
    level = NA,
    N_est    = N_est,
    N_clstr  = N_clstr,
    est    = est,
    lci    = lci,
    uci    = uci,
    tau_est = tau_est,
    tau_lci = tau_lci,
    tau_uci = tau_uci,
    BF10   = BF10,
    BFhet  = BFhet,
    BFbias = BFbias,
    BFmod  = BFmod
  )
  
  if (!is.RoBMA.reg(fit) || length(pred_name) == 0)
    return(out_main)
  
  N_est <- table(fit$data$predictors[[pred_name]])
  if(!all(is.na(fit$data$outcome$study_ids))){
    N_clstr <- rep(NA, length(N_est))
    for(i in seq_along(names(N_est))){
      N_clstr[i] <- length(unique(fit$data$outcome$study_ids[fit$data$predictors[[pred_name]] == names(N_est)[i]]))
    }
  }else{
    N_clstr <- NA
  }
  
  
  out_marg <- data.frame(
    name    = NA,
    level   = rownames(margsummary_fit),
    N_est   = as.numeric(N_est),
    N_clstr = N_clstr,
    est     = margsummary_fit[,1],
    lci     = margsummary_fit[,2],
    uci     = margsummary_fit[,3],
    tau_est = NA,
    tau_lci = NA,
    tau_uci = NA,
    BF10    = margsummary_fit[,4],
    BFhet   = NA,
    BFbias  = NA,
    BFmod   = NA
  )
  
  return(rbind(out_main, out_marg))
}

### 5.10 summarize results ----
out1 <- rbind(
  add_row("RoBMA",                         fit_robma),
  add_row("RoBMA (3lvl)",                  fit_robma.3lvl),
  add_row("RoBMA (3lvl, database)",        fit_robma.3lvl_database),
  add_row("RoBMA (no prostitues)",         fit_robma_noprost),
  add_row("RoBMA (3lvl, no prostitues)",   fit_robma.3lvl_noprost),
  add_row("RoBMA (scalability sd)",         fit_robma_scalability),
  add_row("RoBMA (3lvl, scalability sd)",   fit_robma.3lvl_scalability)
)

write.csv(out1, "main results - unadjusted.csv", row.names = FALSE)

out2 <- rbind(
  add_row("Industry",                           fit_robma_industry),
  add_row("Industry (3lvl)",                    fit_robma.3lvl_industry),
  add_row("Occupation",                         fit_robma_occupation),
  add_row("Occupation (3lvl)",                  fit_robma.3lvl_occupation),
  add_row("Gender of rated",                    fit_robma_gender_of_rated),
  add_row("Gender of rated (3lvl)",             fit_robma.3lvl_gender_of_rated),
  add_row("Facing customer",                    fit_robma_facing_customer),
  add_row("Facing customer (3lvl)",             fit_robma.3lvl_facing_customer),
  add_row("Output measurability dummy",         fit_robma_output_measurability_dummy),
  add_row("Output measurability dummy (3lvl)",  fit_robma.3lvl_output_measurability_dummy),
  add_row("Interaction intensity dummy",        fit_robma_interaction_intensity_dummy),
  add_row("Interaction intensity dummy (3lvl)", fit_robma.3lvl_interaction_intensity_dummy),
  add_row("Cognition-1",                        fit_robma_cognition),
  add_row("Cognition-1 (3lvl)",                 fit_robma.3lvl_cognition),
  add_row("Cognition-2",                        fit_robma_cognition2),
  add_row("Cognition-2 (3lvl)",                 fit_robma.3lvl_cognition2),
  add_row("Cognition-3",                        fit_robma_cognition3),
  add_row("Cognition-3 (3lvl)",                 fit_robma.3lvl_cognition3),
  add_row("Salary",                             fit_robma_salary),
  add_row("Salary (3lvl)",                      fit_robma.3lvl_salary)
)

write.csv(out2, "subgroup results - unadjusted.csv", row.names = FALSE)

out3 <- rbind(
  add_row("RoBMA",                         fit_adj_robma),
  add_row("RoBMA (3lvl)",                  fit_adj_robma.3lvl),
  add_row("RoBMA (3lvl, database)",        fit_adj_robma.3lvl_database),
  add_row("RoBMA (no prostitues)",         fit_adj_robma_noprost),
  add_row("RoBMA (3lvl, no prostitues)",   fit_adj_robma.3lvl_noprost),
  add_row("RoBMA (scalability sd)",         fit_adj_robma_scalability),
  add_row("RoBMA (3lvl, scalability sd)",   fit_adj_robma.3lvl_scalability)
)

write.csv(out3, "main results - adjusted.csv", row.names = FALSE)

out4 <- rbind(
  add_row("Industry",                           fit_adj_robma_industry),
  add_row("Industry (3lvl)",                    fit_adj_robma.3lvl_industry),
  add_row("Occupation",                         fit_adj_robma_occupation),
  add_row("Occupation (3lvl)",                  fit_adj_robma.3lvl_occupation),
  add_row("Gender of rated",                    fit_adj_robma_gender_of_rated),
  add_row("Gender of rated (3lvl)",             fit_adj_robma.3lvl_gender_of_rated),
  add_row("Facing customer",                    fit_adj_robma_facing_customer),
  add_row("Facing customer (3lvl)",             fit_adj_robma.3lvl_facing_customer),
  add_row("Output measurability dummy",         fit_adj_robma_output_measurability_dummy),
  add_row("Output measurability dummy (3lvl)",  fit_adj_robma.3lvl_output_measurability_dummy),
  add_row("Interaction intensity dummy",        fit_adj_robma_interaction_intensity_dummy),
  add_row("Interaction intensity dummy (3lvl)", fit_adj_robma.3lvl_interaction_intensity_dummy),
  add_row("Cognition-1",                        fit_adj_robma_cognition),
  add_row("Cognition-1 (3lvl)",                 fit_adj_robma.3lvl_cognition),
  add_row("Cognition-2",                        fit_adj_robma_cognition2),
  add_row("Cognition-2 (3lvl)",                 fit_adj_robma.3lvl_cognition2),
  add_row("Cognition-3",                        fit_adj_robma_cognition3),
  add_row("Cognition-3 (3lvl)",                 fit_adj_robma.3lvl_cognition3),
  add_row("Salary",                             fit_adj_robma_salary),
  add_row("Salary (3lvl)",                      fit_adj_robma.3lvl_salary)
)

write.csv(out4, "subgroup results - adjusted.csv", row.names = FALSE)

### add zcurve plot ----
zfit_nobma.3lvl <- as_zcurve(fit_nobma.3lvl)
zfit_robma.3lvl <- as_zcurve(fit_robma.3lvl)

pdf("zcurve-plot.pdf", width = 7.5, height = 5)
par(mar = c(4, 4, 1, 1))
hist(zfit_robma.3lvl, from = -5, to = 10, ylim = c(0, 0.3))
lines(zfit_nobma.3lvl, extrapolate = FALSE, from = -5, to = 10, lwd = 2)
lines(zfit_robma.3lvl, extrapolate = FALSE, col = "blue", from = -5, to = 10, lwd = 2)
legend("topright", c("Random-Effects", "RoBMA"), lwd = 2, col = c("black", "blue"), bty = "n")
dev.off()


zfit_nobma_reg.3lvl <- as_zcurve(fit_adj_nobma.3lvl)
zfit_robma_reg.3lvl <- as_zcurve(fit_adj_robma.3lvl)

# essentially identical results to the unadjusted model, keep the simpler visualization
pdf("zcurve-adj-plot.pdf", width = 7.5, height = 5)
par(mar = c(4, 4, 1, 1))
hist(zfit_robma_reg.3lvl, from = -5, to = 10, ylim = c(0, 0.3))
lines(zfit_nobma_reg.3lvl, extrapolate = FALSE, from = -5, to = 10, lwd = 2)
lines(zfit_robma_reg.3lvl, extrapolate = FALSE, col = "blue", from = -5, to = 10, lwd = 2)
legend("topright", c("Random-Effects", "RoBMA"), lwd = 2, col = c("black", "blue"), bty = "n")
dev.off()

### session info
sessionInfo()

############################################################################################
# SECTION 6: Bayesian Model Averaging (BMS)
############################################################################################

# IMPORTANT:
#  - This section assumes that 'beauty_R.xlsx' was exported from Stata with variables
#    in the order given by the macro `bma_vars` in beauty.do.
#  - We overwrite column names with readable labels and then select the subset of
#    variables used in BMA.
#  - The first column ("Premium") is treated as the dependent variable in 'bms()'.

# ---- 6.1 Relabel columns to readable names ----
colnames(databeauty) <- c(
  "Study ID", "Study",
  "Premium", "Standard error",
  "Interviewer-rated beauty", "Photo-rated beauty", "Software-rated beauty",
  "Dummy beauty", "Beauty penalty", "Number of raters",
  "Salary", "Study outcomes", "Teaching & research outcomes",
  "Athletic success", "Electoral success",
  "Male subjects", "Female subjects", "Subjects' age",
  "High-skilled workers", "Prostitutes",
  "Interpersonal intensity", "Output measurability", "Appearance spendings",
  "Western culture", "Panel data", "Data year",
  "OLS method", "IV method", "DID method",
  "Age control", "Education control", "Ethnicity control",
  "Cognitive-skill control", "Non-cognitive-skill control", "Physicality control",
  "Published study", "Impact factor", "Citations",
  "Cognitive measured", "Number of observations",
  "Premium STD", "Standard error STD"
)

# ---- 6.2 Select variables for BMA ----
keep <- c(
  "Premium", "Standard error",
  "Interviewer-rated beauty", "Photo-rated beauty", "Software-rated beauty",
  "Dummy beauty", "Beauty penalty", "Number of raters",
  "Salary", "Study outcomes", "Teaching & research outcomes",
  "Athletic success", "Electoral success",
  "Male subjects", "Female subjects", "Subjects' age",
  "High-skilled workers", "Prostitutes",
  "Interpersonal intensity", "Output measurability", "Appearance spendings",
  "Western culture", "Panel data", "Data year",
  "OLS method", "IV method", "DID method",
  "Age control", "Education control", "Ethnicity control",
  "Cognitive-skill control", "Non-cognitive-skill control", "Physicality control",
  "Published study", "Impact factor", "Citations"
)

df_beauty <- databeauty[keep]

# BMS requires numeric data and no missing values
df_beauty <- as.data.frame(df_beauty)
df_beauty[] <- lapply(df_beauty, function(x) {
  if (is.logical(x)) as.numeric(x) else x
})
df_beauty <- na.omit(df_beauty)

cat("\n============================================================\n")
cat("BMA data summary (after NA removal):\n")
cat("Rows:", nrow(df_beauty), "  Columns:", ncol(df_beauty), "\n")

# ---- 6.3 BMS Model 1: UIP + dilution prior (George et al.) ----
# BMA employs the unit-information prior for regression coefficients
# and the dilution prior proposed by George et al.

beauty1 <- bms(
  df_beauty,
  burn     = 1e5,
  iter     = 3e5,
  g        = "UIP",
  mprior   = "dilut",
  nmodel   = 50000,
  mcmc     = "bd",
  user.int = FALSE
)

coef(beauty1, order.by.pip = FALSE, exact = TRUE, include.constant = TRUE)
image(beauty1,
      yprop2pip    = FALSE,
      order.by.pip = TRUE,
      do.par       = TRUE,
      do.grid      = TRUE,
      do.axis      = TRUE,
      cex.axis     = 0.7)
summary(beauty1)
plot(beauty1)
print(beauty1$topmod[1])

# ---- 6.4 BMS Model 2: BRIC g-prior + random model prior ----
beauty2 <- bms(
  df_beauty,
  burn     = 1e5,
  iter     = 3e5,
  g        = "BRIC",
  mprior   = "random",
  nmodel   = 50000,
  mcmc     = "bd",
  user.int = FALSE
)

coef(beauty2, order.by.pip = FALSE, exact = TRUE, include.constant = TRUE)
image(beauty2,
      yprop2pip    = FALSE,
      order.by.pip = TRUE,
      do.par       = TRUE,
      do.grid      = TRUE,
      do.axis      = TRUE,
      cex.axis     = 0.7)
summary(beauty2)
plot(beauty2)
print(beauty2$topmod[1])

# ---- 6.5 BMS Model 3: Excluding beauty-penalty observations ----
# Here, the underlying dataset is modified: rows with Beauty penalty == 1 are removed.

df_no_penalties <- df_beauty[df_beauty$`Beauty penalty` == 0, ]

cat("\n============================================================\n")
cat("BMA (beauty3): Dataset excluding beauty-penalty observations\n")
cat("Rows:", nrow(df_no_penalties), "  Columns:", ncol(df_no_penalties), "\n")

beauty3 <- bms(
  df_no_penalties,
  burn     = 1e5,
  iter     = 3e5,
  g        = "UIP",
  mprior   = "dilut",
  nmodel   = 50000,
  mcmc     = "bd",
  user.int = FALSE
)

coef(beauty3, order.by.pip = FALSE, exact = TRUE, include.constant = TRUE)
image(beauty3,
      yprop2pip    = FALSE,
      order.by.pip = TRUE,
      do.par       = TRUE,
      do.grid      = TRUE,
      do.axis      = TRUE,
      cex.axis     = 0.7)
summary(beauty3)
plot(beauty3)
print(beauty3$topmod[1])


############################################################################################
# SECTION 7: Best-practice (BP) predictions from benchmark model
############################################################################################

# 7.1 Extract posterior means from beauty1
coef_mat <- coef(beauty1,
                 order.by.pip     = FALSE,
                 exact            = TRUE,
                 include.constant = TRUE)

# Identify the column with posterior means (usually "post mean")
post_mean_col <- grep("post mean", colnames(coef_mat),
                      ignore.case = TRUE, value = TRUE)
if (length(post_mean_col) != 1) {
  stop("Could not uniquely identify the 'post mean' column in coef(beauty1).")
}

beta_hat <- coef_mat[, post_mean_col]
beta_hat <- as.numeric(beta_hat)
names(beta_hat) <- rownames(coef_mat)

# 7.2 Define the baseline Best-Practice profile (BP)
bp <- c(
  "(Intercept)"                  = 1,
  "Standard error"               = 0,
  "Interviewer-rated beauty"     = 0,
  "Photo-rated beauty"           = 0,
  "Software-rated beauty"        = 1,
  "Dummy beauty"                 = 0,
  "Beauty penalty"               = 0,
  "Number of raters"             = 4.44,
  "Salary"                       = 0.59,
  "Study outcomes"               = 0.16,
  "Teaching & research outcomes" = 0.12,
  "Athletic success"             = 0.03,
  "Electoral success"            = 0.03,
  "Male subjects"                = 0.32,
  "Female subjects"              = 0.39,
  "Subjects' age"                = 3.40,
  "High-skilled workers"         = 0.29,
  "Prostitutes"                  = 0,
  "Interpersonal intensity"      = 0.62,
  "Output measurability"         = 1,
  "Appearance spendings"         = 7.23,
  "Western culture"              = 0.75,
  "Panel data"                   = 1,
  "Data year"                    = 3.84,
  "OLS method"                   = 0,
  "IV method"                    = 0,
  "DID method"                   = 1,
  "Age control"                  = 1,
  "Education control"            = 1,
  "Ethnicity control"            = 1,
  "Cognitive-skill control"      = 1,
  "Non-cognitive-skill control"  = 1,
  "Physicality control"          = 1,
  "Published study"              = 1,
  "Impact factor"                = 8.26,
  "Citations"                    = 4.06
)

# Optional: check that BP names match coefficient names
common_names <- intersect(names(beta_hat), names(bp))
if (length(common_names) < length(bp)) {
  warning("Some BP entries do not match coefficient names from beauty1.")
}

# Helper: linear prediction for a given profile
bp_predict <- function(beta, profile) {
  cn <- intersect(names(beta), names(profile))
  sum(beta[cn] * profile[cn])
}

# 7.3 Baseline BP prediction (overall estimate)
bp_baseline <- bp_predict(beta_hat, bp)
cat("\nBP baseline prediction (beauty1):", bp_baseline, "\n")

# 7.4 Construct alternative BP profiles for subsamples

# Athletes
bp_athletes <- bp
bp_athletes["Athletic success"]             <- 1
bp_athletes["Salary"]                       <- 0
bp_athletes["Study outcomes"]               <- 0
bp_athletes["Teaching & research outcomes"] <- 0
bp_athletes["Electoral success"]            <- 0

# Politicians
bp_politicians <- bp
bp_politicians["Electoral success"]            <- 1
bp_politicians["Salary"]                       <- 0
bp_politicians["Study outcomes"]               <- 0
bp_politicians["Teaching & research outcomes"] <- 0
bp_politicians["Athletic success"]             <- 0

# Prostitutes
bp_prostitutes <- bp
bp_prostitutes["Prostitutes"]                  <- 1
bp_prostitutes["Salary"]                       <- 1
bp_prostitutes["Study outcomes"]               <- 0
bp_prostitutes["Teaching & research outcomes"] <- 0
bp_prostitutes["Athletic success"]             <- 0
bp_prostitutes["Electoral success"]            <- 0

# Students
bp_students <- bp
bp_students["Study outcomes"]               <- 1
bp_students["Salary"]                       <- 0
bp_students["Teaching & research outcomes"] <- 0
bp_students["Athletic success"]             <- 0
bp_students["Electoral success"]            <- 0

# Teachers & Scientists
bp_teachers_scientists <- bp
bp_teachers_scientists["Teaching & research outcomes"] <- 1
bp_teachers_scientists["Salary"]                       <- 0
bp_teachers_scientists["Study outcomes"]               <- 0
bp_teachers_scientists["Athletic success"]             <- 0
bp_teachers_scientists["Electoral success"]            <- 0

# Male subjects
bp_male <- bp
bp_male["Male subjects"]   <- 1
bp_male["Female subjects"] <- 0

# Female subjects
bp_female <- bp
bp_female["Male subjects"]   <- 0
bp_female["Female subjects"] <- 1

# Earnings
bp_earnings <- bp
bp_earnings["Salary"]                       <- 1
bp_earnings["Study outcomes"]               <- 0
bp_earnings["Teaching & research outcomes"] <- 0
bp_earnings["Athletic success"]             <- 0
bp_earnings["Electoral success"]            <- 0

# Low / Mid / High interpersonal intensity
bp_low_interpersonal  <- bp; bp_low_interpersonal["Interpersonal intensity"]  <- 0.22
bp_mid_interpersonal  <- bp; bp_mid_interpersonal["Interpersonal intensity"]  <- 0.55
bp_high_interpersonal <- bp; bp_high_interpersonal["Interpersonal intensity"] <- 0.84

# Low / Mid / High output measurability
bp_low_output  <- bp; bp_low_output["Output measurability"]  <- 0.37
bp_mid_output  <- bp; bp_mid_output["Output measurability"]  <- 0.60
bp_high_output <- bp; bp_high_output["Output measurability"] <- 0.78

# 7.5 Compute predictions for all scenarios
bp_results <- data.frame(
  Scenario = c(
    "Baseline BP",
    "Athletes",
    "Politicians",
    "Prostitutes",
    "Students",
    "Teachers & Scientists",
    "Male subjects",
    "Female subjects",
    "Earnings",
    "Low interpersonal intensity",
    "Mid interpersonal intensity",
    "High interpersonal intensity",
    "Low output measurability",
    "Mid output measurability",
    "High output measurability"
  ),
  Estimate = c(
    bp_predict(beta_hat, bp),
    bp_predict(beta_hat, bp_athletes),
    bp_predict(beta_hat, bp_politicians),
    bp_predict(beta_hat, bp_prostitutes),
    bp_predict(beta_hat, bp_students),
    bp_predict(beta_hat, bp_teachers_scientists),
    bp_predict(beta_hat, bp_male),
    bp_predict(beta_hat, bp_female),
    bp_predict(beta_hat, bp_earnings),
    bp_predict(beta_hat, bp_low_interpersonal),
    bp_predict(beta_hat, bp_mid_interpersonal),
    bp_predict(beta_hat, bp_high_interpersonal),
    bp_predict(beta_hat, bp_low_output),
    bp_predict(beta_hat, bp_mid_output),
    bp_predict(beta_hat, bp_high_output)
  )
)

print(bp_results)

############################################################################################
# SECTION 8: Occupation-weighted representative estimate (corrected pooled CI)
############################################################################################
#   Compute a representative, occupation-weighted estimate of the beauty premium using
#   mean-corrected split-normal pooling of study-level (median-aggregated) estimates.

# ---- 8.1 Corrected pooled CI function (from CrI_repres) ----
get_corrected_pooled_ci <- function(means, lowers, uppers, weights, n_sim = 1e6) {
  
  weights <- weights / sum(weights)
  simulated_pop <- numeric(n_sim)
  counts <- as.vector(multinom(1, n_sim, weights))
  current_idx <- 1
  
  for (i in seq_along(means)) {
    n_k <- counts[i]
    if (n_k == 0) next
    
    mu  <- means[i]
    L   <- lowers[i]
    U   <- uppers[i]
    
    sigma_left  <- (mu - L) / 1.96
    sigma_right <- (U - mu) / 1.96
    
    drift <- sqrt(2/pi) * (sigma_right - sigma_left)
    adjusted_mode <- mu - drift
    
    raw <- rnorm(n_k)
    samples <- adjusted_mode +
      ifelse(raw < 0, raw * sigma_left, raw * sigma_right)
    
    simulated_pop[current_idx:(current_idx + n_k - 1)] <- samples
    current_idx <- current_idx + n_k
  }
  
  list(
    mean = mean(simulated_pop),
    ci   = quantile(simulated_pop, c(0.025, 0.975))
  )
}


# ---- 8.2 Helper: run representative estimate ----
run_representative_estimate <- function(data, y_var, se_var, weight_var) {
  
  cat("\n============================================================\n")
  cat("Occupation-weighted representative estimate (corrected pooled CI)\n")
  cat("============================================================\n")
  
  # Drop missing
  ok <- !is.na(data[[y_var]]) & !is.na(data[[se_var]]) & !is.na(data[[weight_var]])
  d  <- data[ok, ]
  
  cat("Observations before median-aggregation:", nrow(d), "\n")
  
  if (nrow(d) == 0) {
    cat("No usable observations.\n")
    return(invisible(NULL))
  }
  
  # Study-level representative values (consistent with STEM)
  med <- data_median(d, "study_id", y_var, se_var)
  cat("Observations after median-aggregation:", nrow(med), "\n")
  
  # Match occupation weights to study-level entries
  w <- d[[weight_var]][match(med$ID, d$study_id)]
  
  # CI bounds
  lower <- med$coefficient - 1.96 * med$standard_error
  upper <- med$coefficient + 1.96 * med$standard_error
  
  # Run corrected pooled CI
  set.seed(42)
  out <- get_corrected_pooled_ci(
    means   = med$coefficient,
    lowers  = lower,
    uppers  = upper,
    weights = w,
    n_sim   = 1e6
  )
  
  cat("Corrected pooled mean:     ", round(out$mean, 3), "\n")
  cat("95% credible interval:     ",
      round(out$ci[1], 3), "to", round(out$ci[2], 3), "\n")
  cat("Weighting:                 Employment shares (occupation-weighted)\n")
  cat("============================================================\n\n")
  
  invisible(out)
}


# ---- 8.3 Run the representative estimate ----
repres_full <- run_representative_estimate(
  data       = databeauty,
  y_var      = "premium_w",
  se_var     = "se_premium_w",
  weight_var = "occupation_weight_share"   # adjust if your variable is named differently
)

############################################################################################
# END OF SCRIPT
############################################################################################
