//Directory for Windows user:
//cd %systemdrive%\Users\%username%\Downloads\Codes\03_EBA\
//Directory for MacOS user:
// cd ~/Downloads/Codes/03_EBA/

use "Datasets/EBA_Master.dta", clear

keep id_name Attributes_uniq Attributes_tot EconOp_uniq EconOp_tot FctWords_uniq FctWords_tot LegalRef_uniq LegalRef_tot LogicalConn_uniq LogicalConn_tot MathOp_uniq MathOp_tot Other_uniq Other_tot RegOp_uniq RegOp_tot Cells_tot name_entity share_large share_medium share_snci 

gen length = Attributes_tot + EconOp_tot + FctWords_tot + LegalRef_tot + LogicalConn_tot + MathOp_tot + Other_tot + RegOp_tot

gen operands_unique = Attributes_uniq + EconOp_uniq
gen operands_tot = Attributes_tot + EconOp_tot
gen operators_unique = LogicalConn_uniq + MathOp_uniq + RegOp_uniq
gen operators_tot = LogicalConn_tot + MathOp_tot + RegOp_tot

rename share_large share1 
    
rename share_medium share2
    
rename share_snci share3

keep share1 share2 share3 id_name length operands_unique operands_tot operators_unique operators_tot RegOp_tot LogicalConn_tot Cells_tot

reshape long share, i(id_name length operands_unique operands_tot operators_unique operators_tot RegOp_tot LogicalConn_tot Cells_tot) j(share_type)
	
gen potential = 2+operands_unique
gen diversity = operators_unique
gen level = potential/length
gen quantity = RegOp_tot
gen cyclomatic = LogicalConn_tot

gen share_1 =0
replace share_1 = 1 if share_type == 1
gen share_2 =0
replace share_2 = 1 if share_type == 2
gen share_3 =0
replace share_3 = 1 if share_type == 3

//1. Correlation of different measures with share, controlling for words.

quietly reg share share_2 share_3 length, robust
predict share_length
estimates store regr1_1
quietly reg share share_2 share_3 length cyclomatic, robust
predict share_cyclomatic
estimates store regr1_2
quietly reg share share_2 share_3 length quantity, robust
predict share_quantity
estimates store regr1_3
quietly reg share share_2 share_3 length potential, robust
predict share_potential
estimates store regr1_4
quietly reg share share_2 share_3 length diversity, robust
predict share_diversity
estimates store regr1_5
quietly reg share share_2 share_3 length level, robust
predict share_level
estimates store regr1_6

// Table 7
estout regr1_1 regr1_2 regr1_3 regr1_4 regr1_5 regr1_6, cells(b(star fmt(5)) t(par fmt(2))) stats(r2_a, fmt(3) labels(Adjusted-R2))  style(tex) keep(length cyclomatic quantity potential diversity level) starlevels(* 0.10 ** 0.05 *** 0.01) ///
legend label collabels(none)  type

// Additional: Check whether predicted values are between 0 and 1.
// For two regressions template C77 has a predicted value of 1.01, all others are below 1 and above 0.
sum share_length share_cyclomatic share_quantity share_potential share_diversity share_level

// ONLINE APPENDIX: fractional probit regressions.

quietly fracreg probit share share_2 share_3 length
estimates store regr2_1
quietly fracreg probit share share_2 share_3 length cyclomatic
estimates store regr2_2
quietly fracreg probit share share_2 share_3 length quantity
estimates store regr2_3
quietly fracreg probit share share_2 share_3 length potential
estimates store regr2_4
quietly fracreg probit share share_2 share_3 length diversity
estimates store regr2_5
quietly fracreg probit share share_2 share_3 length level
estimates store regr2_6

// Table OA.23
estout regr2_1 regr2_2 regr2_3 regr2_4 regr2_5 regr2_6, cells(b(star fmt(5)) t(par fmt(2))) stats(r2_p, fmt(3) labels(Pseudo-R2))  style(tex) keep(length cyclomatic quantity potential diversity level) starlevels(* 0.10 ** 0.05 *** 0.01) ///
legend label collabels(none)  type


//ONLINE APPENDIX: Correlation of different counts of operands and operators with share.

gen words_unique = operands_unique+operators_unique

quietly reg share share_2 share_3 length, robust
estimates store regr3_1
quietly reg share share_2 share_3 operands_tot operators_tot, robust
estimates store regr3_2
quietly reg share share_2 share_3 words_unique, robust
estimates store regr3_3
quietly reg share share_2 share_3 operands_unique operators_unique, robust
estimates store regr3_4
quietly reg share share_2 share_3 length operands_unique operators_unique, robust
estimates store regr3_5

// Table OA.24
estout regr3_1 regr3_2 regr3_3 regr3_4 regr3_5, cells(b(star fmt(5)) t(par fmt(2))) stats(r2_a, fmt(3) labels(Adjusted-R2))  style(tex) keep(length words_unique operands_tot operators_tot operands_unique operators_unique) starlevels(* 0.10 ** 0.05 *** 0.01) ///
legend label collabels(none)  type


// ONLINE APPENDIX: Correlation of different measures with share, not controlling for length.

quietly reg share share_2 share_3 length, robust
estimates store regr4_1
quietly reg share share_2 share_3 cyclomatic, robust
estimates store regr4_2
quietly reg share share_2 share_3 quantity, robust
estimates store regr4_3
quietly reg share share_2 share_3 potential, robust
estimates store regr4_4
quietly reg share share_2 share_3 diversity, robust
estimates store regr4_5
quietly reg share share_2 share_3 level, robust
estimates store regr4_6

// Table OA.25
estout regr4_1 regr4_2 regr4_3 regr4_4 regr4_5 regr4_6, cells(b(star fmt(5)) t(par fmt(2))) stats(r2_a, fmt(3) labels(Adjusted-R2))  style(tex) keep(length cyclomatic quantity potential diversity level) starlevels(* 0.10 ** 0.05 *** 0.01) ///
legend label collabels(none)  type


// ONLINE APPENDIX: Interaction effects

gen length1 = length*share_1
gen length2 = length*share_2
gen length3 = length*share_3

gen quantity1 = quantity*share_1
gen quantity2 = quantity*share_2
gen quantity3 = quantity*share_3

gen potential1 = potential*share_1
gen potential2 = potential*share_2
gen potential3 = potential*share_3

quietly reg share share_2 share_3 length1 length2 length3, robust
estimates store regr5_1
quietly reg share share_2 share_3 length1 length2 length3 quantity1 quantity2 quantity3, robust
estimates store regr5_2
quietly reg share share_2 share_3 length1 length2 length3 potential1 potential2 potential3, robust
estimates store regr5_3

// Table OA.26
estout regr5_1 regr5_2 regr5_3, cells(b(star fmt(5)) t(par fmt(2))) stats(r2_a, fmt(3) labels(Adjusted-R2))  style(tex) keep(length1 length2 length3 quantity1 quantity2 quantity3 potential1 potential2 potential3) starlevels(* 0.10 ** 0.05 *** 0.01) ///
legend label collabels(none)  type

//Additional: Average share by group
sum share if share_1==1
sum share if share_2==1
sum share if share_3==1


//Additional: robustness to removing large values of quantity
sum quantity, d

quietly reg share share_2 share_3 length quantity, robust
estimates store regra_1
quietly reg share share_2 share_3 length quantity, robust, if quantity < 8346
estimates store regra_2
quietly reg share share_2 share_3 length quantity, robust, if quantity < 3986
estimates store regra_3
quietly reg share share_2 share_3 length quantity, robust, if quantity < 2567
estimates store regra_4
quietly reg share share_2 share_3 length quantity, robust, if quantity < 968
estimates store regra_5
quietly reg share share_2 share_3 length quantity, robust, if quantity < 439
estimates store regra_6
quietly reg share share_2 share_3 length quantity, robust, if quantity < 164
estimates store regra_7

// Table Additional
estout regra_1 regra_2 regra_3 regra_4 regra_5 regra_6 regra_7, cells(b(star fmt(5)) t(par fmt(2))) stats(r2_a, fmt(3) labels(Adjusted-R2))  style(tex) keep(length quantity) starlevels(* 0.10 ** 0.05 *** 0.01) ///
legend label collabels(none)  type


//Additional: improving the fit with log specification and potential
gen ln_length = ln(length)
gen ln_quantity = ln(quantity)

quietly reg share share_2 share_3 length, robust
estimates store regrb_1
quietly reg share share_2 share_3 quantity, robust
estimates store regrb_2
quietly reg share share_2 share_3 ln_length, robust
estimates store regrb_3
quietly reg share share_2 share_3 ln_quantity, robust
estimates store regrb_4
quietly reg share share_2 share_3 length quantity, robust
estimates store regrb_5
quietly reg share share_2 share_3 ln_length ln_quantity, robust
estimates store regrb_6
quietly reg share share_2 share_3 ln_length ln_quantity potential, robust
estimates store regrb_7

// Table Additional
estout regrb_1 regrb_2 regrb_3 regrb_4 regrb_5 regrb_6 regrb_7, cells(b(star fmt(5)) t(par fmt(2))) stats(r2_a, fmt(3) labels(Adjusted-R2))  style(tex) keep(length quantity ln_length ln_quantity potential) starlevels(* 0.10 ** 0.05 *** 0.01) ///
legend label collabels(none)  type


//Additional: Both cells and other regulatory operators are significant.
gen quantity_ex_cells = quantity - Cells_tot
gen length_ex_quantity = length - quantity

quietly reg share share_2 share_3 length, robust
estimates store regrc_1
quietly reg share share_2 share_3 length Cells_tot, robust
estimates store regrc_2
quietly reg share share_2 share_3 length Cells_tot quantity_ex_cells, robust
estimates store regrc_3
quietly reg share share_2 share_3 length_ex_quantity Cells_tot quantity_ex_cells, robust
estimates store regrc_4

// Table Additional
estout regrc_1 regrc_2 regrc_3 regrc_4, cells(b(star fmt(5)) t(par fmt(2))) stats(r2_a, fmt(3) labels(Adjusted-R2))  style(tex) keep(length length_ex_quantity quantity_ex_cells Cells_tot) starlevels(* 0.10 ** 0.05 *** 0.01) ///
legend label collabels(none)  type
