//Directory for Windows user:
//cd %systemdrive%\Users\%username%\Downloads\Codes\
//Directory for MacOS user:
//cd ~/Downloads/Codes/

clear all
version 17 
// Store the current working directory
local root_dir `c(pwd)'


// ============================================================================
// PART 1: MISTAKE (EXPERIMENTS DATA)
// ============================================================================
cd "`root_dir'/04_Experiments"
use "Datasets/main.dta", clear
gen mistake = 1-is_correct

// --- Panel A: mistake vs. length and quantity ---
reg mistake length quantity i.id_question i.user, robust
local r2_a_mis_q = e(r2)
local b_len_mis_q: di _b[length]
local b_quan_mis_q: di _b[quantity]
scalar p_len_mis_q = 2*ttail(e(df_r), abs(_b[length]/_se[length]))
scalar p_quan_mis_q = 2*ttail(e(df_r), abs(_b[quantity]/_se[quantity]))
sum mistake if e(sample)
local sd_y_mis_q = r(sd)
sum length if e(sample)
local sd_x1_mis_q = r(sd)
sum quantity if e(sample)
local sd_x2_mis_q = r(sd)
scalar std_coef_len_mis_q = (`b_len_mis_q' * `sd_x1_mis_q') / `sd_y_mis_q'
scalar std_coef_quan_mis_q = (`b_quan_mis_q' * `sd_x2_mis_q') / `sd_y_mis_q'
corr mistake length quantity, covariance
matrix C_mis_q = r(C)
scalar var_y_mis_q = C_mis_q[1,1]
scalar cov_y_x1_mis_q = C_mis_q[1,2]
scalar cov_y_x2_mis_q = C_mis_q[1,3]
scalar r2_cont_len_mis_q = ((`b_len_mis_q' * cov_y_x1_mis_q) / var_y_mis_q) / `r2_a_mis_q'
scalar r2_cont_quan_mis_q = ((`b_quan_mis_q' * cov_y_x2_mis_q) / var_y_mis_q) / `r2_a_mis_q'

// --- Panel B: mistake vs. length and potential ---
reg mistake length potentialvolume i.id_question i.user, robust
local r2_a_mis_p = e(r2)
local b_len_mis_p : di _b[length]
local b_pot_mis_p : di _b[potentialvolume]
scalar p_len_mis_p = 2*ttail(e(df_r), abs(_b[length]/_se[length]))
scalar p_pot_mis_p = 2*ttail(e(df_r), abs(_b[potentialvolume]/_se[potentialvolume]))
sum mistake if e(sample)
local sd_y_mis_p = r(sd)
sum length if e(sample)
local sd_x1_mis_p = r(sd)
sum potentialvolume if e(sample)
local sd_x2_mis_p = r(sd)
scalar std_coef_len_mis_p = (`b_len_mis_p' * `sd_x1_mis_p') / `sd_y_mis_p'
scalar std_coef_pot_mis_p = (`b_pot_mis_p' * `sd_x2_mis_p') / `sd_y_mis_p'
corr mistake length potentialvolume, covariance
matrix C_mis_p = r(C)
scalar var_y_mis_p = C_mis_p[1,1]
scalar cov_y_x1_mis_p = C_mis_p[1,2]
scalar cov_y_x2_mis_p = C_mis_p[1,3]
scalar r2_cont_len_mis_p = ((`b_len_mis_p' * cov_y_x1_mis_p) / var_y_mis_p) / `r2_a_mis_p'
scalar r2_cont_pot_mis_p = ((`b_pot_mis_p' * cov_y_x2_mis_p) / var_y_mis_p) / `r2_a_mis_p'


// ============================================================================
// PART 2: TIME (EXPERIMENTS DATA)
// ============================================================================
cd "`root_dir'/04_Experiments"
use "Datasets/main.dta", clear
rename time time2
generate time = 60*mm(time2)+ss(time2)
drop time2
drop if is_correct==0
drop if time >= 579

// --- Panel A: time vs. length and quantity ---
reg time length quantity i.id_question i.user, robust
local r2_a_time_q = e(r2)
local b_len_time_q: di _b[length]
local b_quan_time_q: di _b[quantity]
scalar p_len_time_q = 2*ttail(e(df_r), abs(_b[length]/_se[length]))
scalar p_quan_time_q = 2*ttail(e(df_r), abs(_b[quantity]/_se[quantity]))
sum time if e(sample)
local sd_y_time_q = r(sd)
sum length if e(sample)
local sd_x1_time_q = r(sd)
sum quantity if e(sample)
local sd_x2_time_q = r(sd)
scalar std_coef_len_time_q = (`b_len_time_q' * `sd_x1_time_q') / `sd_y_time_q'
scalar std_coef_quan_time_q = (`b_quan_time_q' * `sd_x2_time_q') / `sd_y_time_q'
corr time length quantity, covariance
matrix C_time_q = r(C)
scalar var_y_time_q = C_time_q[1,1]
scalar cov_y_x1_time_q = C_time_q[1,2]
scalar cov_y_x2_time_q = C_time_q[1,3]
scalar r2_cont_len_time_q = ((`b_len_time_q' * cov_y_x1_time_q) / var_y_time_q) / `r2_a_time_q'
scalar r2_cont_quan_time_q = ((`b_quan_time_q' * cov_y_x2_time_q) / var_y_time_q) / `r2_a_time_q'

// --- Panel B: time vs. length and potential ---
reg time length potentialvolume i.id_question i.user, robust
local r2_a_time_p = e(r2)
local b_len_time_p : di _b[length]
local b_pot_time_p : di _b[potentialvolume]
scalar p_len_time_p = 2*ttail(e(df_r), abs(_b[length]/_se[length]))
scalar p_pot_time_p = 2*ttail(e(df_r), abs(_b[potentialvolume]/_se[potentialvolume]))
sum time if e(sample)
local sd_y_time_p = r(sd)
sum length if e(sample)
local sd_x1_time_p = r(sd)
sum potentialvolume if e(sample)
local sd_x2_time_p = r(sd)
scalar std_coef_len_time_p = (`b_len_time_p' * `sd_x1_time_p') / `sd_y_time_p'
scalar std_coef_pot_time_p = (`b_pot_time_p' * `sd_x2_time_p') / `sd_y_time_p'
corr time length potentialvolume, covariance
matrix C_time_p = r(C)
scalar var_y_time_p = C_time_p[1,1]
scalar cov_y_x1_time_p = C_time_p[1,2]
scalar cov_y_x2_time_p = C_time_p[1,3]
scalar r2_cont_len_time_p = ((`b_len_time_p' * cov_y_x1_time_p) / var_y_time_p) / `r2_a_time_p'
scalar r2_cont_pot_time_p = ((`b_pot_time_p' * cov_y_x2_time_p) / var_y_time_p) / `r2_a_time_p'


// ============================================================================
// PART 3: COST (EBA DATA)
// ============================================================================
cd "`root_dir'/03_EBA"
use "Datasets/EBA_Master.dta", clear
gen length = Attributes_tot + EconOp_tot + FctWords_tot + LegalRef_tot + LogicalConn_tot + MathOp_tot + Other_tot + RegOp_tot
gen operands_unique = Attributes_uniq + EconOp_uniq
gen operands_tot = Attributes_tot + EconOp_tot
gen operators_unique = LogicalConn_uniq + MathOp_uniq + RegOp_uniq
gen operators_tot = LogicalConn_tot + MathOp_tot + RegOp_tot
gen potential = 2+operands_unique
gen quantity = RegOp_tot
gen cyclomatic = LogicalConn_tot
rename share_large share1
rename share_medium share2
rename share_snci share3
reshape long share, i(id_name length operands_unique operands_tot operators_unique operators_tot RegOp_tot LogicalConn_tot Cells_tot) j(share_type)
gen share_2 = (share_type == 2)
gen share_3 = (share_type == 3)

// --- Panel A: cost vs. length and quantity ---
reg share share_2 share_3 length quantity, robust
local r2_a_cost_q = e(r2)
local b_len_cost_q: di _b[length]
local b_quan_cost_q: di _b[quantity]
scalar p_len_cost_q = 2*ttail(e(df_r), abs(_b[length]/_se[length]))
scalar p_quan_cost_q = 2*ttail(e(df_r), abs(_b[quantity]/_se[quantity]))
sum share if e(sample)
local sd_y_cost_q = r(sd)
sum length if e(sample)
local sd_x1_cost_q = r(sd)
sum quantity if e(sample)
local sd_x2_cost_q = r(sd)
scalar std_coef_len_cost_q = (`b_len_cost_q' * `sd_x1_cost_q') / `sd_y_cost_q'
scalar std_coef_quan_cost_q = (`b_quan_cost_q' * `sd_x2_cost_q') / `sd_y_cost_q'
corr share length quantity, covariance
matrix C_cost_q = r(C)
scalar var_y_cost_q = C_cost_q[1,1]
scalar cov_y_x1_cost_q = C_cost_q[1,2]
scalar cov_y_x2_cost_q = C_cost_q[1,3]
scalar r2_cont_len_cost_q = ((`b_len_cost_q' * cov_y_x1_cost_q) / var_y_cost_q) / `r2_a_cost_q'
scalar r2_cont_quan_cost_q = ((`b_quan_cost_q' * cov_y_x2_cost_q) / var_y_cost_q) / `r2_a_cost_q'
di `b_quan_cost_q'
di cov_y_x2_cost_q
di var_y_cost_q
di `r2_a_cost_q'

// --- Panel B: cost vs. length and potential ---
reg share share_2 share_3 length potential, robust
local r2_a_cost_p = e(r2)
local b_len_cost_p : di _b[length]
local b_pot_cost_p : di _b[potential]
scalar p_len_cost_p = 2*ttail(e(df_r), abs(_b[length]/_se[length]))
scalar p_pot_cost_p = 2*ttail(e(df_r), abs(_b[potential]/_se[potential]))
sum share if e(sample)
local sd_y_cost_p = r(sd)
sum length if e(sample)
local sd_x1_cost_p = r(sd)
sum potential if e(sample)
local sd_x2_cost_p = r(sd)
scalar std_coef_len_cost_p = (`b_len_cost_p' * `sd_x1_cost_p') / `sd_y_cost_p'
scalar std_coef_pot_cost_p = (`b_pot_cost_p' * `sd_x2_cost_p') / `sd_y_cost_p'
corr share length potential, covariance
matrix C_cost_p = r(C)
scalar var_y_cost_p = C_cost_p[1,1]
scalar cov_y_x1_cost_p = C_cost_p[1,2]
scalar cov_y_x2_cost_p = C_cost_p[1,3]
scalar r2_cont_len_cost_p = ((`b_len_cost_p' * cov_y_x1_cost_p) / var_y_cost_p) / `r2_a_cost_p'
scalar r2_cont_pot_cost_p = ((`b_pot_cost_p' * cov_y_x2_cost_p) / var_y_cost_p) / `r2_a_cost_p'


// ============================================================================
// PART 4: IMPORTANCE (EBA DATA)
// ============================================================================
cd "`root_dir'/03_EBA"
use "Datasets/EBA_Master_Regulators.dta", clear
gen length = Attributes_tot + EconOp_tot + FctWords_tot + LegalRef_tot + LogicalConn_tot + MathOp_tot + Other_tot + RegOp_tot
gen operands_unique = Attributes_uniq + EconOp_uniq
gen potential = 2+operands_unique
gen quantity = RegOp_tot
gen average = (3*highly + 2*important + 1*less)/(highly+important+less+not)

// --- Panel A: importance vs. length and quantity ---
reg average length quantity, robust
local r2_a_imp_q = e(r2)
local b_len_imp_q: di _b[length]
local b_quan_imp_q: di _b[quantity]
scalar p_len_imp_q = 2*ttail(e(df_r), abs(_b[length]/_se[length]))
scalar p_quan_imp_q = 2*ttail(e(df_r), abs(_b[quantity]/_se[quantity]))
sum average if e(sample)
local sd_y_imp_q = r(sd)
sum length if e(sample)
local sd_x1_imp_q = r(sd)
sum quantity if e(sample)
local sd_x2_imp_q = r(sd)
scalar std_coef_len_imp_q = (`b_len_imp_q' * `sd_x1_imp_q') / `sd_y_imp_q'
scalar std_coef_quan_imp_q = (`b_quan_imp_q' * `sd_x2_imp_q') / `sd_y_imp_q'
corr average length quantity, covariance
matrix C_imp_q = r(C)
scalar var_y_imp_q = C_imp_q[1,1]
scalar cov_y_x1_imp_q = C_imp_q[1,2]
scalar cov_y_x2_imp_q = C_imp_q[1,3]
scalar r2_cont_len_imp_q = ((`b_len_imp_q' * cov_y_x1_imp_q) / var_y_imp_q) / `r2_a_imp_q'
scalar r2_cont_quan_imp_q = ((`b_quan_imp_q' * cov_y_x2_imp_q) / var_y_imp_q) / `r2_a_imp_q'

// --- Panel B: importance vs. length and potential ---
reg average length potential, robust
local r2_a_imp_p = e(r2)
local b_len_imp_p : di _b[length]
local b_pot_imp_p : di _b[potential]
scalar p_len_imp_p = 2*ttail(e(df_r), abs(_b[length]/_se[length]))
scalar p_pot_imp_p = 2*ttail(e(df_r), abs(_b[potential]/_se[potential]))
sum average if e(sample)
local sd_y_imp_p = r(sd)
sum length if e(sample)
local sd_x1_imp_p = r(sd)
sum potential if e(sample)
local sd_x2_imp_p = r(sd)
scalar std_coef_len_imp_p = (`b_len_imp_p' * `sd_x1_imp_p') / `sd_y_imp_p'
scalar std_coef_pot_imp_p = (`b_pot_imp_p' * `sd_x2_imp_p') / `sd_y_imp_p'
corr average length potential, covariance
matrix C_imp_p = r(C)
scalar var_y_imp_p = C_imp_p[1,1]
scalar cov_y_x1_imp_p = C_imp_p[1,2]
scalar cov_y_x2_imp_p = C_imp_p[1,3]
scalar r2_cont_len_imp_p = ((`b_len_imp_p' * cov_y_x1_imp_p) / var_y_imp_p) / `r2_a_imp_p'
scalar r2_cont_pot_imp_p = ((`b_pot_imp_p' * cov_y_x2_imp_p) / var_y_imp_p) / `r2_a_imp_p'


// ============================================================================
// PART 5: GENERATE LATEX TABLE (File writing logic)
// ============================================================================
cd "`root_dir'"
file open tablefile using "06_Other/Output/Table9_automated.tex", write replace

// --- Write headers ---
file write tablefile "\begin{tabular}{l c c c c c c c c}" _n
file write tablefile "\multicolumn{9}{c}{Panel A - length and quantity} \\" _n
file write tablefile "" _n
file write tablefile "\toprule" _n
file write tablefile "" _tab "& \multicolumn{2}{c}{\textbf{mistake}}" _tab "& \multicolumn{2}{c}{time}" _tab "& \multicolumn{2}{c}{\textbf{cost}}" _tab "& \multicolumn{2}{c}{importance} \\" _n
file write tablefile "\midrule" _n
file write tablefile "Variable" _tab "& \textbf{length} & \textbf{quantity}" _tab "& length & quantity" _tab "& \textbf{length} & \textbf{quantity}" _tab "& length & quantity \\" _n

// --- Build and write Panel A rows ---
local row = "Standardized coeff."
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(scalar(std_coef_len_mis_q), "%4.2f") + "} & \textbf{" + strofreal(scalar(std_coef_quan_mis_q), "%4.2f") + "}"
local row = `"`row'"' + char(9) + "& " + strofreal(scalar(std_coef_len_time_q), "%4.2f") + " & " + strofreal(scalar(std_coef_quan_time_q), "%4.2f") + " "
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(scalar(std_coef_len_cost_q), "%4.2f") + "} & \textbf{" + strofreal(scalar(std_coef_quan_cost_q), "%4.2f") + "}"
local row = `"`row'"' + char(9) + "& " + strofreal(scalar(std_coef_len_imp_q), "%4.2f") + " & " + strofreal(scalar(std_coef_quan_imp_q), "%4.2f") + " \\"
file write tablefile `"`row'"' _n
local row = "p-value"
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(scalar(p_len_mis_q), "%4.2f") + "} & \textbf{" + strofreal(scalar(p_quan_mis_q), "%4.2f") + "}"
local row = `"`row'"' + char(9) + "& " + strofreal(scalar(p_len_time_q), "%4.2f") + " & " + strofreal(scalar(p_quan_time_q), "%4.2f") + " "
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(scalar(p_len_cost_q), "%4.2f") + "} & \textbf{" + strofreal(scalar(p_quan_cost_q), "%4.2f") + "}"
local row = `"`row'"' + char(9) + "& " + strofreal(scalar(p_len_imp_q), "%4.2f") + " & " + strofreal(scalar(p_quan_imp_q), "%4.2f") + " \\"
file write tablefile `"`row'"' _n
local row = "Contribution to $R^2$"
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(100*scalar(r2_cont_len_mis_q), "%4.0f") + "\%} & \textbf{" + strofreal(100*scalar(r2_cont_quan_mis_q), "%4.0f") + "\%}"
local row = `"`row'"' + char(9) + "& " + strofreal(100*scalar(r2_cont_len_time_q), "%4.0f") + "\% & " + strofreal(100*scalar(r2_cont_quan_time_q), "%4.0f") + "\% "
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(100*scalar(r2_cont_len_cost_q), "%4.0f") + "\%} & \textbf{" + strofreal(100*scalar(r2_cont_quan_cost_q), "%4.0f") + "\%}"
local row = `"`row'"' + char(9) + "& " + strofreal(100*scalar(r2_cont_len_imp_q), "%4.0f") + "\% & " + strofreal(100*scalar(r2_cont_quan_imp_q), "%4.0f") + "\% \\"
file write tablefile `"`row'"' _n

// --- Write Panel B headers and spacer ---
file write tablefile "\bottomrule" _n
file write tablefile " & & & & & & & & \\" _n
file write tablefile "\multicolumn{9}{c}{Panel B - length and potential} \\" _n
file write tablefile "" _n
file write tablefile "\toprule" _n
file write tablefile "" _tab "& \multicolumn{2}{c}{mistake}" _tab "& \multicolumn{2}{c}{\textbf{time}}" _tab "& \multicolumn{2}{c}{cost}" _tab "& \multicolumn{2}{c}{\textbf{importance}} \\" _n
file write tablefile "\midrule" _n
file write tablefile "Variable" _tab "& length & potential" _tab "& \textbf{length} & \textbf{potential}" _tab "& length & potential" _tab "& \textbf{length} & \textbf{potential} \\" _n

// Row 4: Standardized coeff.
local row = "Standardized coeff."
local row = `"`row'"' + char(9) + "& " + strofreal(scalar(std_coef_len_mis_p), "%4.2f") + " & " + strofreal(scalar(std_coef_pot_mis_p), "%4.2f") + " "
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(scalar(std_coef_len_time_p), "%4.2f") + "} & \textbf{" + strofreal(scalar(std_coef_pot_time_p), "%4.2f") + "}"
local row = `"`row'"' + char(9) + "& " + strofreal(scalar(std_coef_len_cost_p), "%4.2f") + " & " + strofreal(scalar(std_coef_pot_cost_p), "%4.2f") + " "
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(scalar(std_coef_len_imp_p), "%4.2f") + "} & \textbf{" + strofreal(scalar(std_coef_pot_imp_p), "%4.2f") + "} \\"
file write tablefile `"`row'"' _n

// Row 5: p-value
local row = "p-value"
local row = `"`row'"' + char(9) + "& " + strofreal(scalar(p_len_mis_p), "%4.2f") + " & " + strofreal(scalar(p_pot_mis_p), "%4.2f") + " "
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(scalar(p_len_time_p), "%4.2f") + "} & \textbf{" + strofreal(scalar(p_pot_time_p), "%4.2f") + "}"
local row = `"`row'"' + char(9) + "& " + strofreal(scalar(p_len_cost_p), "%4.2f") + " & " + strofreal(scalar(p_pot_cost_p), "%4.2f") + " "
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(scalar(p_len_imp_p), "%4.2f") + "} & \textbf{" + strofreal(scalar(p_pot_imp_p), "%4.2f") + "} \\"
file write tablefile `"`row'"' _n

// Row 6: Contribution to R2
local row = "Contribution to $R^2$"
local row = `"`row'"' + char(9) + "& " + strofreal(100*scalar(r2_cont_len_mis_p), "%4.0f") + "\% & " + strofreal(100*scalar(r2_cont_pot_mis_p), "%4.0f") + "\% "
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(100*scalar(r2_cont_len_time_p), "%4.0f") + "\%} & \textbf{" + strofreal(100*scalar(r2_cont_pot_time_p), "%4.0f") + "\%}"
local row = `"`row'"' + char(9) + "& " + strofreal(100*scalar(r2_cont_len_cost_p), "%4.0f") + "\% & " + strofreal(100*scalar(r2_cont_pot_cost_p), "%4.0f") + "\% "
local row = `"`row'"' + char(9) + "& \textbf{" + strofreal(100*scalar(r2_cont_len_imp_p), "%4.0f") + "\%} & \textbf{" + strofreal(100*scalar(r2_cont_pot_imp_p), "%4.0f") + "\%} \\"
file write tablefile `"`row'"' _n

file write tablefile "\bottomrule" _n
file write tablefile "\end{tabular}" _n

file close tablefile

di ""
di "Success! ✅"
di "Automated Table 9 has been saved to `c(pwd)'/Table9_automated.tex"
