/******************************************************************************************
  REPLICATION SCRIPT – STATA
  Paper:   "Meta-Analysis of Field Studies on Beauty and Professional Success"
  Authors: Z. Irsova, T. Havranek, K. Bortnikova, F. Bartoš

  Purpose:
    Replicate the main Stata-based analyses reported in the paper:

      (1) Data import, construction of convenience variables, and winsorization
      (2) Descriptive statistics, correlations, and multicollinearity diagnostics
      (3) Descriptive graphs (histograms, time trends, box plots by study and country)
      (4) Publication-bias diagnostics:
            - funnel plots
            - FAT–PET and PET–PEESE regressions
            - WAAP (Ioannidis et al., 2017)
            - endogenous-kink model (Bom & Rachinger, 2020)
            - caliper tests (Gerber & Malhotra, 2008)
      (5) Heterogeneity analysis and “best-practice” frequentist fitted effects
      (6) Export of analysis dataset for BMA and other R-based procedures

  Data requirements:
    - beauty.xlsx (sheet "data") in the working directory
      created from the master Stata dataset as described in the main replication files.

  Software requirements:
    - Stata 15 or later

  User-written Stata packages (install once):
    - winsor2   (ssc install winsor2)
    - metan     (ssc install metan)
    - ivreg2    (ssc install ivreg2)
    - weakiv    (ssc install weakiv)
    - reghdfe   (ssc install reghdfe)
    - collin    (ssc install collin)
    - estout    (ssc install estout)    // provides eststo, esttab
    - univar    (ssc install univar)    // univariate summaries
    - boottest  (net install boottest, ///
                 from("https://raw.githubusercontent.com/droodman/boottest/main/"))

  Output:
    - beauty.log                     – Stata log of all commands and results
    - histogram.gph, trend.gph,
      studies.gph, countries.gph,
      funnel_all.gph, funnel.gph,
      caliper.gph                    – graphs used in the paper
    - table_bias_*.tex               – LaTeX tables with FAT–PET estimates
    - beauty_R.xlsx                  – dataset exported for R-based methods (RTMA, STEM, AK, BMA)

  Notes:
    - The script assumes a clean Stata session and overwrites any existing
      log, graph, and table files with the same names in the working directory.
******************************************************************************************/

version 15
capture log close
clear all
set more off

log using beauty.log, replace

******************************************************************************************
* 1. DATA IMPORT & BASIC PREP
******************************************************************************************/

import excel "beauty.xlsx", sheet("data") firstrow
xtset study_id

* Winsorization (1–99%) of effect and SE
foreach v in premium se_premium premium_sd se_premium_sd {
    winsor2 `v', suffix(_w) cuts(1 99)
}

* Convenience variables
gen se_premium_w2         = se_premium_w^2
gen precision_premium     = 1 / se_premium
gen precision_premium_w   = 1 / se_premium_w
gen precision_premium_sd_w = 1 / se_premium_sd_w
gen penalty           = premium_w * (-1) if beauty_penalty == 1
gen precision_penalty = 1 / se_premium_w if beauty_penalty == 1
bysort study_id: gen n_obs = _N
gen weight = 1 / n_obs
gen number_of_raters = ln(no_of_raters)
gen age_subject = ln(average_age)
summarize datyear, meanonly
local min_datyear = r(min)
gen data_year = ln(datyear-`min_datyear'+1)
gen publication_year = 0
replace publication_year = ln(pubyear-1994+1) if pubyear >= 1994
gen citations = ln(tot_citations/(citcollec_year-appear_year+1)+1)

univar premium_w se_premium_w penalty

******************************************************************************************
* 2. SUMMARY STATISTICS
******************************************************************************************/

* Unweighted means
mean premium_w
mean premium_w if interviewer_rated_beauty==1
mean premium_w if photo_rated_beauty==1
mean premium_w if software_rated_beauty==1
mean premium_w if self_rated_beauty==1
mean premium_w if dummy_beauty==1
mean premium_w if categorical_beauty==1
mean premium_w if beauty_penalty==0
mean premium_w if beauty_penalty==1
mean premium_w if salary==1
mean premium_w if salary==0
mean premium_w if study_outcomes==1
mean premium_w if teaching_research_outcomes==1
mean premium_w if athletic_success==1
mean premium_w if electoral_success==1
mean premium_w if other_outcomes==1
mean premium_w if male_subjects==1
mean premium_w if female_subjects==1
mean premium_w if mix_gender_subjects==1
mean premium_w if high_skilled_workers==1
mean premium_w if prostitutes==1
mean premium_w if other_dressy_occupations==1
mean premium_w if non_dressy_occupation==1
mean premium_w if western_culture==1
mean premium_w if other_cultures==1
mean premium_w if industry=="customer services"
mean premium_w if industry=="financial services"
mean premium_w if industry=="legal services"
mean premium_w if industry=="political office"
mean premium_w if industry=="professional sports"
mean premium_w if industry=="scientific research"
mean premium_w if industry=="sex industry"
mean premium_w if occupation=="general"
mean premium_w if occupation=="athletes"
mean premium_w if occupation=="executives"
mean premium_w if occupation=="lawyers"
mean premium_w if occupation=="politicians"
mean premium_w if occupation=="sex workers"
mean premium_w if occupation=="salesman"
mean premium_w if occupation=="scientists"
mean premium_w if occupation=="students"
mean premium_w if occupation=="teachers"
mean premium_w if facing_customer==0
mean premium_w if facing_customer==0.5
mean premium_w if facing_customer==1
mean premium_w if interaction_intensity_dummy==1
mean premium_w if interaction_intensity_dummy==2
mean premium_w if interaction_intensity_dummy==3
mean premium_w if output_measurability_dummy==1
mean premium_w if output_measurability_dummy==2
mean premium_w if output_measurability_dummy==3
mean premium_w if cognitive_skill_control==0 & noncognitive_skill_control==0
mean premium_w if cognitive_skill_control==1 & noncognitive_skill_control==0
mean premium_w if cognitive_skill_control==0 & noncognitive_skill_control==1
mean premium_w if cognitive_skill_control==1 & noncognitive_skill_control==1
mean premium_w if cognitive_measured==1 & noncognitive_skill_control==0
mean premium_w if cognitive_measured==1 & noncognitive_skill_control==1
mean premium_w if cognitive_measured==1 | quasi_experimental_method==1
mean premium_w if panel_data==1
mean premium_w if cross_section==1
mean premium_w if ols_method==1
mean premium_w if iv_method==1
mean premium_w if quasi_experimental_method==1
mean premium_w if other_method==1
mean premium_w if cognitive_skill_control==1
mean premium_w if cognitive_skill_control==0
mean premium_w if high_quality==1
mean premium_w if high_quality==0
mean premium_w if published_study==1
mean premium_w if published_study==0

* Weighted means
mean premium_w [aweight=weight]
mean premium_w [aweight=weight] if interviewer_rated_beauty==1
mean premium_w [aweight=weight] if photo_rated_beauty==1
mean premium_w [aweight=weight] if software_rated_beauty==1
mean premium_w [aweight=weight] if self_rated_beauty==1
mean premium_w [aweight=weight] if dummy_beauty==1
mean premium_w [aweight=weight] if categorical_beauty==1
mean premium_w [aweight=weight] if beauty_penalty==0
mean premium_w [aweight=weight] if beauty_penalty==1
mean premium_w [aweight=weight] if salary==1
mean premium_w [aweight=weight] if salary==0
mean premium_w [aweight=weight] if study_outcomes==1
mean premium_w [aweight=weight] if teaching_research_outcomes==1
mean premium_w [aweight=weight] if athletic_success==1
mean premium_w [aweight=weight] if electoral_success==1
mean premium_w [aweight=weight] if other_outcomes==1
mean premium_w [aweight=weight] if male_subjects==1
mean premium_w [aweight=weight] if female_subjects==1
mean premium_w [aweight=weight] if mix_gender_subjects==1
mean premium_w [aweight=weight] if high_skilled_workers==1
mean premium_w [aweight=weight] if prostitutes==1
mean premium_w [aweight=weight] if other_dressy_occupations==1
mean premium_w [aweight=weight] if non_dressy_occupation==1
mean premium_w [aweight=weight] if western_culture==1
mean premium_w [aweight=weight] if other_cultures==1
mean premium_w [aweight=weight] if industry=="customer services"
mean premium_w [aweight=weight] if industry=="financial services"
mean premium_w [aweight=weight] if industry=="legal services"
mean premium_w [aweight=weight] if industry=="political office"
mean premium_w [aweight=weight] if industry=="professional sports"
mean premium_w [aweight=weight] if industry=="scientific research"
mean premium_w [aweight=weight] if industry=="sex industry"
mean premium_w [aweight=weight] if occupation=="general"
mean premium_w [aweight=weight] if occupation=="athletes"
mean premium_w [aweight=weight] if occupation=="executives"
mean premium_w [aweight=weight] if occupation=="lawyers"
mean premium_w [aweight=weight] if occupation=="politicians"
mean premium_w [aweight=weight] if occupation=="sex workers"
mean premium_w [aweight=weight] if occupation=="salesman"
mean premium_w [aweight=weight] if occupation=="scientists"
mean premium_w [aweight=weight] if occupation=="students"
mean premium_w [aweight=weight] if occupation=="teachers"
mean premium_w [aweight=weight] if facing_customer==0
mean premium_w [aweight=weight] if facing_customer==0.5
mean premium_w [aweight=weight] if facing_customer==1
mean premium_w [aweight=weight] if interaction_intensity_dummy==1
mean premium_w [aweight=weight] if interaction_intensity_dummy==2
mean premium_w [aweight=weight] if interaction_intensity_dummy==3
mean premium_w [aweight=weight] if output_measurability_dummy==1
mean premium_w [aweight=weight] if output_measurability_dummy==2
mean premium_w [aweight=weight] if output_measurability_dummy==3
mean premium_w [aweight=weight] if cognitive_skill_control==0 & noncognitive_skill_control==0
mean premium_w [aweight=weight] if cognitive_skill_control==1 & noncognitive_skill_control==0
mean premium_w [aweight=weight] if cognitive_skill_control==0 & noncognitive_skill_control==1
mean premium_w [aweight=weight] if cognitive_skill_control==1 & noncognitive_skill_control==1
mean premium_w [aweight=weight] if cognitive_measured==1 & noncognitive_skill_control==0
mean premium_w [aweight=weight] if cognitive_measured==1 & noncognitive_skill_control==1
mean premium_w [aweight=weight] if cognitive_measured==1 | quasi_experimental_method==1
mean premium_w [aweight=weight] if panel_data==1
mean premium_w [aweight=weight] if cross_section==1
mean premium_w [aweight=weight] if ols_method==1
mean premium_w [aweight=weight] if iv_method==1
mean premium_w [aweight=weight] if quasi_experimental_method==1
mean premium_w [aweight=weight] if other_method==1
mean premium_w [aweight=weight] if cognitive_skill_control==1
mean premium_w [aweight=weight] if cognitive_skill_control==0
mean premium_w [aweight=weight] if high_quality==1
mean premium_w [aweight=weight] if high_quality==0
mean premium_w [aweight=weight] if published_study==1
mean premium_w [aweight=weight] if published_study==0

sum interaction_intensity if interaction_intensity_dummy==1
sum interaction_intensity if interaction_intensity_dummy==2
sum interaction_intensity if interaction_intensity_dummy==3
sum output_measurability if output_measurability_dummy==1
sum output_measurability if output_measurability_dummy==2
sum output_measurability if output_measurability_dummy==3

sum premium_w se_premium_w interviewer_rated_beauty photo_rated_beauty ///
    software_rated_beauty self_rated_beauty dummy_beauty categorical_beauty ///
    beauty_penalty number_of_raters salary study_outcomes teaching_research_outcomes ///
    athletic_success electoral_success other_outcomes male_subjects female_subjects ///
    mix_gender_subjects age_subject high_skilled_workers prostitutes ///
    interaction_intensity output_measurability appearance_spending western_culture ///
    other_cultures panel_data cross_section data_year ols_method iv_method ///
    quasi_experimental_method other_method ageexp_control education_control ///
    ethnicity_control cognitive_skill_control noncognitive_skill_control ///
    physicality_control publication_year published_study impact_factor ///
    high_quality citations

sum premium_w se_premium_w interviewer_rated_beauty photo_rated_beauty ///
    software_rated_beauty self_rated_beauty dummy_beauty categorical_beauty ///
    beauty_penalty number_of_raters salary study_outcomes teaching_research_outcomes ///
    athletic_success electoral_success other_outcomes male_subjects female_subjects ///
    mix_gender_subjects age_subject high_skilled_workers prostitutes ///
    interaction_intensity output_measurability appearance_spending western_culture ///
    other_cultures panel_data cross_section data_year ols_method iv_method ///
    quasi_experimental_method other_method ageexp_control education_control ///
    ethnicity_control cognitive_skill_control noncognitive_skill_control ///
    physicality_control publication_year published_study impact_factor ///
    high_quality citations [aweight = weight]

correlate premium_w se_premium_w interviewer_rated_beauty photo_rated_beauty ///
    software_rated_beauty dummy_beauty beauty_penalty number_of_raters salary ///
    study_outcomes teaching_research_outcomes athletic_success electoral_success ///
    male_subjects female_subjects age_subject high_skilled_workers prostitutes ///
    other_dressy_occupations western_culture panel_data data_year ///
    ols_method iv_method quasi_experimental_method ageexp_control education_control ///
    ethnicity_control cognitive_skill_control noncognitive_skill_control ///
    physicality_control publication_year published_study impact_factor ///
    high_quality citations

collin premium_w se_premium_w interviewer_rated_beauty photo_rated_beauty ///
    software_rated_beauty dummy_beauty beauty_penalty number_of_raters salary ///
    study_outcomes teaching_research_outcomes athletic_success electoral_success ///
    male_subjects female_subjects age_subject high_skilled_workers prostitutes ///
    other_dressy_occupations western_culture panel_data data_year ///
    ols_method iv_method quasi_experimental_method ageexp_control education_control ///
    ethnicity_control cognitive_skill_control noncognitive_skill_control ///
    physicality_control published_study impact_factor high_quality citations

******************************************************************************************
* 3. DESCRIPTIVE GRAPHS
******************************************************************************************/

* Histogram of beauty effect
histogram premium if premium > -20 & premium < 40, ///
    bin(90) fcolor(gs14) lstyle(thin) frequency ///
    xtitle("Estimate of the beauty effect") ///
    xline(0, lcolor(gs12) lpattern(shortdash)) ///
    xline(4.3, lcolor(red)) ///
    xlabel(-20 0 4.3 20 40) ///
    ylabel(, glcolor(ltbluishgray)) ///
    graphregion(color(ltbluishgray)) ///
    saving(histogram, replace)

* Time trend
bysort study_id: egen premium_med = median(premium)
bysort study_id: egen midyear_med = median(datyear)
graph twoway ///
    (scatter premium_med midyear_med, msymbol(Oh) ///
        yline(4.3, lpattern(dott) lcolor(red)) ///
        yline(0,   lpattern(shortdash) lcolor(gs12)) ///
        ylabel(-10 0 4.3 10 20 30, glcolor(ltbluishgray)) ///
        graphregion(color(ltbluishgray))), ///
    xtitle("Median year of data") ///
    ytitle("Median estimate of the beauty effect (%)") ///
    legend(off) ///
    saving(trend, replace)

* Box plot of studies
graph hbox premium if premium > -20 & premium < 40, ///
    over(study, label(grid) sort(datyear)) ///
    xsize(2.5) ysize(4) scale(0.55) ///
    yline(0,   lcolor(gs12) lpattern(shortdash)) ///
    yline(4.3, lcolor(red)) ///
    ylabel(-20 0 4.3 20 40, glcolor(ltbluishgray)) ///
    box(1, lcolor(black) fcolor(none)) ///
    marker(1, msymbol(circle_hollow) mcolor(gs12)) ///
    medline(lcolor(gs9)) ///
    ytitle("Estimate of the beauty effect (%)") ///
    graphregion(color(ltbluishgray)) ///
    saving(studies, replace)

* Box plot of countries
graph hbox premium if premium > -20 & premium < 40 & country != "US & Canada", ///
    over(country, sort(gdp)) xsize(6) ysize(4) scale(0.8) ///
    yline(0,   lcolor(gs12) lpattern(shortdash)) ///
    yline(4.3, lcolor(red)) ///
    ylabel(-20 0 4.3 20 40, glcolor(ltbluishgray)) ///
    box(1, lcolor(black) fcolor(none)) ///
    marker(1, msymbol(circle_hollow) mcolor(gs12)) ///
    medline(lcolor(gs9)) ///
    ytitle("Estimate of the beauty effect (%)") ///
    graphregion(color(ltbluishgray)) ///
    saving(countries, replace)

******************************************************************************************
* 4. PUBLICATION BIAS – FUNNEL PLOTS
******************************************************************************************/

* Full sample
twoway scatter precision_premium premium ///
    if precision_premium < 10 & premium > -20 & premium < 40, ///
    xlab(-20 0 4.3 20 40, nogrid labcolor(black)) ///
    xline(0,   lcolor(gs12) lpattern(shortdash))  ///
    xline(4.3, lpattern(dott) lcolor(red))        ///
    xtitle("Estimate of the beauty premium") ///
    ylabel(, glcolor(ltbluishgray)) ///
    ytitle("Precision of the estimate (1/SE)") ///
    msymbol(smcircle_hollow) ///
    graphregion(color(ltbluishgray)) ///
    saving(funnel_all, replace)

* Full sample highlighting sex workers
twoway ///
 (scatter precision_premium premium if precision_premium < 10 & ///
     premium > -20 & premium < 40 & prostitutes != 1, ///
     msymbol(Oh) xline(0, lcolor(gs12) lpattern(shortdash)) ///
     xline(4.3, lpattern(dott) lcolor(red)) ///
     xlab(-20 0 4.3 20 40, nogrid labcolor(black)) ///
     legend(label(1 "Sample without sex workers"))) ///
 (scatter precision_premium premium if precision_premium < 10 & ///
     premium > -20 & premium < 40 & prostitutes == 1, ///
     msymbol(lgx) color(red) ///
     xlab(-20 0 4.3 20 40, nogrid labcolor(black)) ///
     legend(label(2 "Sex workers"))), ///
 legend(ring(0) position(2) bmargin(medium) rows(2) region(lstyle(none))) ///
 xtitle("Estimate of the beauty premium") ///
 ytitle("Precision of the estimate (1/SE)") ///
 graphregion(color(ltbluishgray)) ///
 saving(funnel, replace)

******************************************************************************************
* 5. PUBLICATION BIAS – FAT–PET (Stanley, 2005)
******************************************************************************************/

xtset study_id

* Full sample (clustered at the study level)
eststo: ivreg2 premium_w se_premium_w, cluster(study_id)
        boottest se_premium_w, nograph 
        boottest _cons,        nograph 
eststo: xtreg premium_w se_premium_w, fe vce(cluster study_id)
eststo: xtreg premium_w se_premium_w, be       
eststo: ivreg2 premium_w se_premium_w [pweight = weight*weight], ///
        cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
eststo: ivreg2 premium_w se_premium_w ///
        [pweight = 1/(se_premium_w*se_premium_w)], cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
esttab using table_bias_CLstudy.tex, se booktabs replace compress ///
       title(FAT-PET premium (OLS, FE, BE, wNOBS, wSE) \label{tab:fatpet}) ///
       star(\sym{*} 0.10 \sym{**} 0.05 \sym{***} 0.01)
eststo clear

* Full sample (clustered at the database level)
eststo: ivreg2 premium_w se_premium_w, cluster(database_id)
        boottest se_premium_w, nograph 
        boottest _cons,        nograph 
eststo: reghdfe premium_w se_premium_w, absorb(study_id) ///
        vce(cluster database_id)
preserve
collapse (mean) premium_w se_premium_w (firstnm) database_id, by(study_id)
eststo: regress premium_w se_premium_w, vce(cluster database_id)
restore
eststo: ivreg2 premium_w se_premium_w [pweight = weight*weight], ///
        cluster(database_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
eststo: ivreg2 premium_w se_premium_w ///
        [pweight = 1/(se_premium_w*se_premium_w)], cluster(database_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
esttab using table_bias_CLdatabase.tex, se booktabs replace compress ///
       title(FAT-PET premium (OLS, FE, BE, wNOBS, wSE) \label{tab:fatpet}) ///
       star(\sym{*} 0.10 \sym{**} 0.05 \sym{***} 0.01)
eststo clear

* Sex workers only
eststo: ivreg2 premium_w se_premium_w if prostitutes == 1, ///
        cluster(study_id)
        boottest se_premium_w, nograph 
        boottest _cons,        nograph 
eststo: xtreg premium_w se_premium_w if prostitutes == 1, ///
        fe vce(cluster study_id)
eststo: xtreg premium_w se_premium_w if prostitutes == 1, be    
eststo: ivreg2 premium_w se_premium_w [pweight = weight*weight] ///
        if prostitutes == 1, cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
eststo: ivreg2 premium_w se_premium_w ///
        [pweight = 1/(se_premium_w*se_premium_w)] ///
        if prostitutes == 1, cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
esttab using table_bias_prostitutes.tex, se booktabs replace compress ///
       title(FAT-PET prostitutes (OLS, FE, BE, wNOBS, wSE) \label{tab:fatpet}) ///
       star(\sym{*} 0.10 \sym{**} 0.05 \sym{***} 0.01)
eststo clear

* No sex workers
eststo: ivreg2 premium_w se_premium_w if prostitutes == 0, ///
        cluster(study_id)
        boottest se_premium_w, nograph 
        boottest _cons,        nograph 
eststo: xtreg premium_w se_premium_w if prostitutes == 0, ///
        fe vce(cluster study_id)
eststo: xtreg premium_w se_premium_w if prostitutes == 0, be    
eststo: ivreg2 premium_w se_premium_w [pweight = weight*weight] ///
        if prostitutes == 0, cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
eststo: ivreg2 premium_w se_premium_w ///
        [pweight = 1/(se_premium_w*se_premium_w)] ///
        if prostitutes == 0, cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
esttab using table_bias_noprostitutes.tex, se booktabs replace compress ///
       title(FAT-PET without prostitutes (OLS, FE, BE, wNOBS, wSE) \label{tab:fatpet}) ///
       star(\sym{*} 0.10 \sym{**} 0.05 \sym{***} 0.01)
eststo clear

* Penalties only
eststo: ivreg2 premium_w se_premium_w if beauty_penalty == 1, ///
        cluster(study_id)
        boottest se_premium_w, nograph 
        boottest _cons,        nograph 
eststo: xtreg premium_w se_premium_w if beauty_penalty == 1, ///
        fe vce(cluster study_id)
eststo: xtreg premium_w se_premium_w if beauty_penalty == 1, be       
eststo: ivreg2 premium_w se_premium_w [pweight = weight*weight] ///
        if beauty_penalty == 1, cluster(study_id) 
        boottest se_premium_w, nograph
        boottest _cons,        nograph
eststo: ivreg2 premium_w se_premium_w ///
        [pweight = 1/(se_premium_w*se_premium_w)] ///
        if beauty_penalty == 1, cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
esttab using table_bias_penalty.tex, se booktabs replace compress ///
       title(FAT-PET penalty (OLS, FE, BE, wNOBS, wSE) \label{tab:fatpet}) ///
       star(\sym{*} 0.10 \sym{**} 0.05 \sym{***} 0.01)
eststo clear

* No penalties
eststo: ivreg2 premium_w se_premium_w if beauty_penalty == 0, ///
        cluster(study_id)
        boottest se_premium_w, nograph 
        boottest _cons,        nograph 
eststo: xtreg premium_w se_premium_w if beauty_penalty == 0, ///
        fe vce(cluster study_id)
eststo: xtreg premium_w se_premium_w if beauty_penalty == 0, be       
eststo: ivreg2 premium_w se_premium_w [pweight = weight*weight] ///
        if beauty_penalty == 0, cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
eststo: ivreg2 premium_w se_premium_w ///
        [pweight = 1/(se_premium_w*se_premium_w)] ///
        if beauty_penalty == 0, cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
esttab using table_bias_premium_only.tex, se booktabs replace compress ///
       title(FAT-PET premia without penalties (OLS, FE, BE, wNOBS, wSE) \label{tab:fatpet}) ///
       star(\sym{*} 0.10 \sym{**} 0.05 \sym{***} 0.01)
eststo clear

* Standardized effect
eststo: ivreg2 premium_sd_w se_premium_sd_w, cluster(study_id)
        boottest se_premium_sd_w, nograph 
        boottest _cons,          nograph 
eststo: xtreg premium_sd_w se_premium_sd_w, fe vce(cluster study_id)
eststo: xtreg premium_sd_w se_premium_sd_w, be            
eststo: ivreg2 premium_sd_w se_premium_sd_w [pweight = weight*weight], ///
        cluster(study_id)
        boottest se_premium_sd_w, nograph
        boottest _cons,          nograph
eststo: ivreg2 premium_sd_w se_premium_sd_w ///
        [pweight = 1/(se_premium_sd_w*se_premium_sd_w)], cluster(study_id)
        boottest se_premium_sd_w, nograph
        boottest _cons,          nograph
esttab using table_bias_sd.tex, se booktabs replace compress ///
       title(FAT-PET standardized effect (OLS, FE, BE, wNOBS, wSE) \label{tab:fatpet_sd}) ///
       star(\sym{*} 0.10 \sym{**} 0.05 \sym{***} 0.01)
eststo clear

* Cognitive skill objectively measured or quasi-experimental
eststo: ivreg2 premium_w se_premium_w ///
        if (cognitive_measured == 1 | quasi_experimental_method == 1), ///
        cluster(study_id)
        boottest se_premium_w, nograph 
        boottest _cons,        nograph 
eststo: xtreg premium_w se_premium_w ///
        if (cognitive_measured == 1 | quasi_experimental_method == 1), ///
        fe vce(cluster study_id)
eststo: xtreg premium_w se_premium_w ///
        if (cognitive_measured == 1 | quasi_experimental_method == 1), be       
eststo: ivreg2 premium_w se_premium_w [pweight = weight*weight] ///
        if (cognitive_measured == 1 | quasi_experimental_method == 1), ///
        cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
eststo: ivreg2 premium_w se_premium_w ///
        [pweight = 1/(se_premium_w*se_premium_w)] ///
        if (cognitive_measured == 1 | quasi_experimental_method == 1), ///
        cluster(study_id)
        boottest se_premium_w, nograph
        boottest _cons,        nograph
esttab using table_bias_quasi.tex, se booktabs replace compress ///
       title(FAT-PET premia – quasi / cognitive (OLS, FE, BE, wNOBS, wSE) \label{tab:fatpet_quasi}) ///
       star(\sym{*} 0.10 \sym{**} 0.05 \sym{***} 0.01)
eststo clear

******************************************************************************************
* 5. PUBLICATION BIAS – PET-PEESE (Stanley, 2009)
******************************************************************************************/

* PET regression (uses SE)
reg premium_w se_premium_w [aweight = 1/se_premium_w2], vce(cluster study_id)
scalar PET_b  = _b[_cons]
scalar PET_se = _se[_cons]
scalar PET_t  = PET_b / PET_se
scalar PET_df = e(df_r)
scalar PET_crit = invttail(PET_df, 0.025)

display "PET intercept = " PET_b "   (SE = " PET_se ", t = " PET_t ")"

* PEESE regression (uses SE2)
reg premium_w se_premium_w2 [aweight = 1/se_premium_w2], vce(cluster study_id)
scalar PEESE_b  = _b[_cons]
scalar PEESE_se = _se[_cons]

display "PEESE intercept = " PEESE_b "   (SE = " PEESE_se ")"

* PET–PEESE decision rule
if abs(PET_t) > PET_crit {
    display "=> PET is significant → report PEESE"
    scalar FINAL_b  = PEESE_b
    scalar FINAL_se = PEESE_se
}
else {
    display "=> PET is NOT significant → report PET"
    scalar FINAL_b  = PET_b
    scalar FINAL_se = PET_se
}

* 95% CI
scalar FINAL_lo = FINAL_b - PET_crit*FINAL_se
scalar FINAL_hi = FINAL_b + PET_crit*FINAL_se

display "Final PET-PEESE estimate = " FINAL_b " (SE: " FINAL_se ")  [95% CI: " FINAL_lo ", " FINAL_hi "]"
	
******************************************************************************************
* 6. PUBLICATION BIAS – WAAP (Ioannidis et al., 2017)
******************************************************************************************/

* Full sample
summarize premium_w [aweight = 1/(se_premium_w*se_premium_w)]
gen waapbound_premium = abs(r(mean))/2.8
reg tstat_premium precision_premium_w if se_premium_w < waapbound_premium, noconstant

* Sex workers only
summarize premium_w if prostitutes == 1 [aweight = 1/(se_premium_w*se_premium_w)]
gen waapbound_prostitutes = abs(r(mean))/2.8
reg tstat_premium precision_premium_w if se_premium_w < waapbound_prostitutes & prostitutes == 1, noconstant

* No sex workers
summarize premium_w if prostitutes != 1 [aweight = 1/(se_premium_w*se_premium_w)]
gen waapbound_noprostitutes = abs(r(mean))/2.8
reg tstat_premium precision_premium_w if se_premium_w < waapbound_noprostitutes & prostitutes != 1, noconstant

* Penalties only
summarize premium_w if beauty_penalty == 1 [aweight = 1/(se_premium_w*se_premium_w)]
gen waapbound_penalty = abs(r(mean))/2.8
reg tstat_premium precision_premium_w if se_premium_w < waapbound_penalty & beauty_penalty == 1, noconstant

* No penalties
summarize premium_w if beauty_penalty == 0 [aweight = 1/(se_premium_w*se_premium_w)]
gen waapbound_pure = abs(r(mean))/2.8
reg tstat_premium precision_premium_w if se_premium_w < waapbound_pure & beauty_penalty == 0, noconstant

* Standardized effect
summarize premium_sd_w [aweight = 1/(se_premium_sd_w*se_premium_sd_w)]
gen waapbound_premium_sd = abs(r(mean))/2.8
reg tstat_premium precision_premium_sd_w if se_premium_sd_w < waapbound_premium_sd, noconstant

* Cognitive skill objectively measured or quasi-experimental
summarize premium_w if (cognitive_measured == 1 | quasi_experimental_method == 1) ///
    [aweight = 1/(se_premium_w*se_premium_w)]
gen waapbound_quasi = abs(r(mean))/2.8
reg tstat_premium precision_premium_w ///
    if se_premium_w < waapbound_quasi & (cognitive_measured == 1 | quasi_experimental_method == 1), noconstant

******************************************************************************************
* 7. PUBLICATION BIAS – ENDOGENOUS KINK (Bom & Rachinger, 2020)
******************************************************************************************/

capture program drop ek_kink
program define ek_kink, rclass
    // Endogenous kink test (Bom & Rachinger, 2020)
    // Usage: ek_kink <effect> <SE> [if] [in], LABEL(string)
    syntax varlist(min=2 max=2 numeric) [if] [in], LABEL(string)

    marksample touse
    tokenize `varlist'
    local y  `1'
    local se `2'

    tempvar bs sebs ones sebs2 wis bs_sebs ones_sebs sebs_a1 pubbias

    quietly {
        count if `touse'
        local M = r(N)
        if `M' == 0 {
            exit 2000
        }

        // Copy variables for this subsample
        gen `bs'   = `y'  if `touse'
        gen `sebs' = `se' if `touse'
        gen `ones' = 1    if `touse'

        // Moments and weights
        summarize `sebs' if `touse'
        local se_min = r(min)
        local se_max = r(max)

        gen `sebs2'     = `sebs'^2 if `touse'
        gen `wis'       = `ones'/`sebs2' if `touse'
        gen `bs_sebs'   = `bs'/`sebs'    if `touse'
        gen `ones_sebs' = `ones'/`sebs'  if `touse'

        summarize `wis' if `touse'
        local wis_sum = r(sum)

        // PET vs PEESE part (same logic as in your original script)
        regress `bs_sebs' `ones_sebs' `ones' if `touse', noc
        local t1_linreg = _b[`ones_sebs'] / _se[`ones_sebs']
        local b_lin     = _b[`ones_sebs']
        local Q1_lin    = e(rss)

        regress `bs_sebs' `ones_sebs' `sebs' if `touse', noc
        local b_sq  = _b[`ones_sebs']
        local Q1_sq = e(rss)

        local abs_t1 = abs(`t1_linreg')
        if `abs_t1' > invt(`M' - 2, 0.975) {
            local combreg = `b_sq'
            local Q1      = `Q1_sq'
        }
        else {
            local combreg = `b_lin'
            local Q1      = `Q1_lin'
        }

        local sigh2hat = max(0, `M' * ((`Q1'/(`M' - e(df_m) - 1)) - 1) / `wis_sum')
        local sighhat  = sqrt(`sigh2hat')

        if `combreg' > 1.96*`sighhat' {
            local a1 = (`combreg' - 1.96*`sighhat') * ///
                       (`combreg' + 1.96*`sighhat') / (2*1.96*`combreg')
        }
        else {
            local a1 = 0
        }

        // Defaults
        local b0_ek  .
        local b1_ek  .
        local sd0_ek .
        local sd1_ek .

        // EK regression: mirror of your rename+regress block
        if `a1' > `se_min' & `a1' < `se_max' {
            gen `sebs_a1' = `sebs' - `a1' if `touse' & `sebs' > `a1'
            replace `sebs_a1' = 0 if `touse' & `sebs' <= `a1'
            gen `pubbias' = `sebs_a1'/`sebs' if `touse'

            // bs/se = alpha1*(1/se) + delta*(se-a1)/se + e
            regress `bs_sebs' `ones_sebs' `pubbias' if `touse', noc
            local b0_ek  = _b[`ones_sebs']
            local b1_ek  = _b[`pubbias']
            local sd0_ek = _se[`ones_sebs']
            local sd1_ek = _se[`pubbias']
        }
        else if `a1' < `se_min' {
            // Here ones acts as pub_bias
            regress `bs_sebs' `ones_sebs' `ones' if `touse', noc
            local b0_ek  = _b[`ones_sebs']
            local b1_ek  = _b[`ones']
            local sd0_ek = _se[`ones_sebs']
            local sd1_ek = _se[`ones']
        }
        else if `a1' > `se_max' {
            // No publication bias term
            regress `bs_sebs' `ones_sebs' if `touse', noc
            local b0_ek  = _b[`ones_sebs']
            local sd0_ek = _se[`ones_sebs']
        }
    }

    // Now actually print the result (outside quietly)
    di as text "--------------------------------------------------"
    di as text "Endogenous kink – `label'"
    di as text "  N                        = " %9.0f `M'
    di as text "  a1 (kink)                = " %9.4f `a1'
    di as text "  alpha1 (mean effect)     = " %9.4f `b0_ek'
    di as text "  SE(alpha1)               = " %9.4f `sd0_ek'
    di as text "  delta (publication bias) = " %9.4f `b1_ek'
    di as text "  SE(delta)                = " %9.4f `sd1_ek'

    return scalar N         = `M'
    return scalar a1        = `a1'
    return scalar alpha1    = `b0_ek'
    return scalar se_alpha1 = `sd0_ek'
    return scalar delta     = `b1_ek'
    return scalar se_delta  = `sd1_ek'
end

* Full sample
ek_kink premium_w se_premium_w, label("All estimates")

* Sex workers only
ek_kink premium_w se_premium_w if prostitutes == 1, ///
    label("Sex workers")

* No sex workers
ek_kink premium_w se_premium_w if prostitutes == 0, ///
    label("No sex workers")

* Penalties only
ek_kink premium_w se_premium_w if beauty_penalty == 1, ///
    label("Beauty penalties")

* No penalties
ek_kink premium_w se_premium_w if beauty_penalty == 0, ///
    label("Beauty premia")
	
* Standardized effect
ek_kink premium_sd_w se_premium_sd_w, ///
    label("Standardized effect")

* Cognitive skill objectively measured or quasi-experimental
ek_kink premium_w se_premium_w if cognitive_measured == 1 ///
    | quasi_experimental_method == 1, ///
    label("Cognitive skill objectively measured or quasi-experimental")

******************************************************************************************
* 8. PUBLICATION BIAS – CALIPER TEST (Gerber & Malhotra, 2008)
******************************************************************************************/

twoway ///
    (histogram tstat_premium if tstat_premium > -5 & tstat_premium < 10, ///
        bin(65) fcolor(gs14) lstyle(thin) ///
        xtitle("t-statistics of the estimate of the beauty effect") ///
        ytitle("Density") ///
        xline(0, lcolor(gs12) lpattern(shortdash)) ///
        xline(1.65 2.58, lcolor(red)) ///
        xlabel(-5 0 1.65 2.58 5 10) ///
        ylabel(, glcolor(ltbluishgray)) ///
        graphregion(color(ltbluishgray))) ///
    (kdensity tstat_premium if tstat_premium > -5 & tstat_premium < 10, ///
        lcolor(navy)), ///
    legend(off) saving(caliper, replace)

* Calipers around 0
gen significant = (tstat_premium > 0)

forvalues w = 5(5)50 {
    local bw = `w'/100
    reg significant if tstat_premium > -`bw' & tstat_premium < `bw'
    lincom _cons - 0.5
}

* Calipers around 1.96
replace significant = (tstat_premium > 1.96)

forvalues w = 5(5)50 {
    local low  = 1.96 - `w'/100
    local high = 1.96 + `w'/100
    reg significant if tstat_premium > `low' & tstat_premium < `high'
    lincom _cons - 0.5
}

* Calipers around 2.58
replace significant = (tstat_premium > 2.58)

forvalues w = 5(5)50 {
    local low  = 2.58 - `w'/100
    local high = 2.58 + `w'/100
    reg significant if tstat_premium > `low' & tstat_premium < `high'
    lincom _cons - 0.5
}

******************************************************************************************
* 9. HETEROGENEITY – OLS & BEST-PRACTICE EFFECTS
******************************************************************************************/

summarize data_year,     detail
summarize impact_factor, detail
summarize citations,     detail

correlate premium_w se_premium_w interviewer_rated_beauty photo_rated_beauty ///
    software_rated_beauty dummy_beauty beauty_penalty number_of_raters ///
    salary study_outcomes teaching_research_outcomes athletic_success ///
    electoral_success male_subjects female_subjects age_subject ///
    high_skilled_workers prostitutes interaction_intensity ///
    output_measurability appearance_spending western_culture ///
    panel_data data_year ols_method iv_method quasi_experimental_method ///
    ageexp_control education_control ethnicity_control cognitive_skill_control ///
    noncognitive_skill_control physicality_control published_study ///
    impact_factor citations

collin premium_w se_premium_w interviewer_rated_beauty photo_rated_beauty ///
    software_rated_beauty dummy_beauty beauty_penalty number_of_raters ///
    salary study_outcomes teaching_research_outcomes athletic_success ///
    electoral_success male_subjects female_subjects age_subject ///
    high_skilled_workers prostitutes interaction_intensity ///
    output_measurability appearance_spending western_culture ///
    panel_data data_year ols_method iv_method quasi_experimental_method ///
    ageexp_control education_control ethnicity_control cognitive_skill_control ///
    noncognitive_skill_control physicality_control published_study ///
    impact_factor citations

ivreg2 premium_w se_premium_w interviewer_rated_beauty photo_rated_beauty ///
    software_rated_beauty dummy_beauty beauty_penalty number_of_raters ///
    salary study_outcomes teaching_research_outcomes athletic_success ///
    electoral_success male_subjects female_subjects age_subject ///
    high_skilled_workers prostitutes interaction_intensity ///
    output_measurability appearance_spending western_culture ///
    panel_data data_year ols_method iv_method quasi_experimental_method ///
    ageexp_control education_control ethnicity_control cognitive_skill_control ///
    noncognitive_skill_control physicality_control published_study ///
    impact_factor citations, cluster(study_id)

stepwise, pr(.05): regress premium_w se_premium_w interviewer_rated_beauty ///
    photo_rated_beauty software_rated_beauty dummy_beauty beauty_penalty ///
    number_of_raters salary study_outcomes teaching_research_outcomes ///
    athletic_success electoral_success male_subjects female_subjects ///
    age_subject high_skilled_workers prostitutes interaction_intensity ///
    output_measurability appearance_spending western_culture panel_data ///
    data_year ols_method iv_method quasi_experimental_method ///
    ageexp_control education_control ethnicity_control cognitive_skill_control ///
    noncognitive_skill_control physicality_control published_study ///
    impact_factor citations, cluster(study_id)

ivreg2 premium_w se_premium_w prostitutes panel_data cognitive_skill_control ///
    impact_factor, cluster(study_id)

ivreg2 premium_w se_premium_w photo_rated_beauty prostitutes ///
    quasi_experimental_method cognitive_skill_control impact_factor ///
    if beauty_penalty == 0, cluster(study_id)

* Best-practice: quasi_experimental_method = 0.5, cognitive_skill_control = 0.5
ivreg2 premium_w se_premium_w prostitutes ///
    quasi_experimental_method cognitive_skill_control, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5

* Athletes
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control athletic_success, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + athletic_success*1

* Politicians
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control electoral_success, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + electoral_success*1

* Sex workers
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*1 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5

* Students
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control study_outcomes, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + study_outcomes*1

* Teachers & scientists
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control teaching_research_outcomes, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + teaching_research_outcomes*1

* Male
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control male_subjects, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + male_subjects*1

* Female
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control female_subjects, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + female_subjects*1

* Earnings outcome
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control salary, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + salary*1

* Interpersonal intensity
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control interaction_intensity, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + interaction_intensity*0.22
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + interaction_intensity*0.55
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + interaction_intensity*0.84

* Output measurability
ivreg premium_w se_premium_w prostitutes quasi_experimental_method ///
    cognitive_skill_control output_measurability, cluster(study_id)
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + output_measurability*0.37
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + output_measurability*0.60
lincom _cons + se_premium_w*0 + prostitutes*0 ///
    + quasi_experimental_method*0.5 + cognitive_skill_control*0.5 ///
    + output_measurability*0.78
	
******************************************************************************************
* 10. Export dataset for BMA in R
******************************************************************************************/

* List variables in the exact order you want in the XLS:
local bma_vars ///
    study_id study ///
    premium_w se_premium_w ///
    interviewer_rated_beauty photo_rated_beauty software_rated_beauty ///
    dummy_beauty beauty_penalty number_of_raters ///
    salary study_outcomes teaching_research_outcomes ///
    athletic_success electoral_success ///
    male_subjects female_subjects age_subject high_skilled_workers prostitutes ///
    interaction_intensity output_measurability appearance_spending ///
    western_culture panel_data data_year ///
    ols_method iv_method quasi_experimental_method ///
    ageexp_control education_control ethnicity_control ///
    cognitive_skill_control noncognitive_skill_control physicality_control ///
    published_study impact_factor citations ///
    cognitive_measured nobs ///
    premium_sd se_premium_sd

* Export only these variables to Excel, in this order
export excel `bma_vars' using "beauty_R.xlsx", ///
    firstrow(variables) replace

******************************************************************************************
* END
******************************************************************************************/

log close
clear
