//Directory for Windows user:
//cd %systemdrive%\Users\%username%\Downloads\Codes\
//Directory for MacOS user:
//cd ~/Downloads/Codes/

// First, for EBA
//
use ./03_EBA/Datasets/EBA_Master.dta
gen source = "EBA"
rename id_name id

gen length = Attributes_tot + EconOp_tot + FctWords_tot + LegalRef_tot + LogicalConn_tot + MathOp_tot + RegOp_tot + Other_tot
gen cyclomatic = LogicalConn_tot
gen quantity = RegOp_tot
gen potential = 2 + EconOp_uniq + Attributes_uniq
gen level = potential / length 
gen diversity = LogicalConn_uniq + MathOp_uniq + RegOp_uniq

replace cyclomatic = cyclomatic/length
replace diversity = diversity/length
replace quantity = quantity/length

keep id source length cyclomatic quantity diversity level

save ./06_Other/Datasets/Histograms.dta, replace

// Second, for DFA
//
use ./02_DoddFrank_Act/Datasets/counts_titles.dta, clear

rename category_count c
rename category_unique_count uc

reshape wide c uc, i(title) j(category) string

rename cattributes Attributes_tot
rename ceconomicoperands EconOp_tot
rename cfunctionwords FctWords_tot
rename clegalreferences LegalRef_tot
rename clogicalconnectors LogicalConn_tot
rename cmathematicaloperators MathOp_tot
rename cother other_tot
rename cregulatoryoperators RegOp_tot

rename ucattributes Attributes_uniq
rename uceconomicoperands EconOp_uniq
rename ucfunctionwords FctWords_uniq
rename uclegalreferences LegalRef_uniq
rename uclogicalconnectors LogicalConn_uniq
rename ucmathematicaloperators MathOp_uniq
rename ucother other_uniq
rename ucregulatoryoperators RegOp_uniq

replace MathOp_tot = 0 if MathOp_tot == .  // to avoid issues with computing complexity measures
replace MathOp_uniq = 0 if MathOp_uniq == .

rename other_tot Other_tot
rename other_uniq Other_uniq 

rename title name_entity
gen id_name = group(name_entity)
keep id_name Attributes_uniq Attributes_tot EconOp_uniq EconOp_tot FctWords_uniq FctWords_tot LegalRef_uniq LegalRef_tot LogicalConn_uniq LogicalConn_tot MathOp_uniq MathOp_tot Other_uniq Other_tot RegOp_uniq RegOp_tot name_entity

gen length = Attributes_tot + EconOp_tot + FctWords_tot + LegalRef_tot + LogicalConn_tot + MathOp_tot + RegOp_tot + Other_tot
gen cyclomatic = LogicalConn_tot
gen quantity = RegOp_tot
gen potential = 2 + EconOp_uniq + Attributes_uniq
gen level = potential / length 
gen diversity = LogicalConn_uniq + MathOp_uniq + RegOp_uniq

replace cyclomatic = cyclomatic/length
replace diversity = diversity/length
replace quantity = quantity/length

gen source = "DFA"
rename id_name id 

keep id source length cyclomatic quantity diversity level

// now append EBA data for twoway histograms 
append using ./06_Other/Datasets/Histograms.dta

save ./06_Other/Datasets/Histograms.dta, replace


foreach var in length cyclomatic quantity diversity level {

    // First, determine a suitable range and width based on variable
    quietly summarize `var', detail
    local min = r(min)
    local max = r(max)

    // Define number of bins you want (e.g., 10 bins)
    local bins = 10
    local width = (`max' - `min')/`bins'

    twoway ///
        (histogram `var' if source == "DFA", fraction ///
            width(`width') start(`min') color(red%50) lwidth(medthin)) ///
        (histogram `var' if source == "EBA", fraction ///
            width(`width') start(`min') color(blue%50) lwidth(medthin)), ///
        legend(off) ///
        title("") ///
        xtitle("") ytitle("Fraction") ///
        yscale(range(0 1)) ylabel(0(0.1)1) ///
        graphregion(color(white))

    graph export "./06_Other/Output/Figure2_`var'.png", replace	
}
