// 02_DoddFrank_Act/ 

//Directory for Windows user:
//cd %systemdrive%\Users\%username%\Downloads\Codes\02_DoddFrank_Act\
//Directory for MacOS user:
//cd ~/Downloads/Codes/02_DoddFrank_Act/

// Table OA.6
// Define the maximum number of files and the file path
local maxfiles = 16
local filepath = "./Source_datasets/DFA-titles_processed/cons-count_title_"

// Loop over the files
forval i=1/`maxfiles' {

    // Create a temporary file to hold the combined data of other files
    tempfile combined_others

    // Initialize a flag to indicate if it's the first file to append
    local first_file = 1

    // Loop again to append all files except the current one (i)
    forval j=1/`maxfiles' {
        
        // Skip the current file (i)
        if `j' != `i' {

            // Load file j
            use "`filepath'`j'.dta", clear

            // For the first file, save it to start the combined dataset
            if `first_file' == 1 {
				drop count 
				duplicates drop
                save `combined_others', replace
                local first_file = 0  // Turn off first file flag
            }
            // For subsequent files, append to the combined dataset
            else {
				drop count
                append using `combined_others'
				duplicates drop
                save `combined_others', replace
            }
        }
    }
	duplicates drop
    // Save the combined dataset for other files
    save "Source_completeness/other_cons-count_title_`i'.dta", replace
}


// compute fractions
local maxfiles = 16
forval i=1/`maxfiles' {
	use "./Source_datasets/DFA-titles_processed/cons-count_title_`i'.dta", clear
	drop count 
	duplicates drop
merge 1:1 key using Source_completeness/other_cons-count_title_`i'.dta
	gen matched = 0
	replace matched = 1 if _merge == 3
	drop if _merge == 2
	drop _merge 
save tmp_frac.dta, replace

	gen tot_count = _N
	egen tot_found = total(matched)
	gen frac_found = tot_found / tot_count 
	gen title = `i'
	replace category = "all"
	keep category title frac_found 
	duplicates drop
save "Source_completeness/frac_found_allavg-`i'.dta", replace

use tmp_frac.dta, clear 
	replace category = "economicoperands" if category == "attributes"
	replace category = "operands" if category == "economicoperands"
	replace category = "other" if category == "functionwords"
	replace category = "other" if category == "legalreferences"
	bysort category: gen tot_count = _N
	bysort category: egen in_other = total(matched)
	gen frac_found = in_other / tot_count 
	keep category frac_found 
	duplicates drop
	gen title = `i'
append using "Source_completeness/frac_found_allavg-`i'.dta"
	replace frac_found = round(frac_found, 0.01)
save Source_completeness/frac_found-`i'.dta, replace
}

// merge them together
local maxfiles = 16
use Source_completeness/frac_found-1.dta, clear
forval i=2/`maxfiles' {
	append using Source_completeness/frac_found-`i'.dta
}
// 	bysort category: egen average = mean(frac_found)
	sort title category
	reshape wide frac_found, i(title) j(category) string
	
	rename frac_foundall All
	rename frac_foundoperands Operands
	rename frac_foundlogicalconnectors LogicalConnectors
	rename frac_foundmathematicaloperators MathematicalOperators
	rename frac_foundother Other
	rename frac_foundregulatoryoperators RegulatoryOperators
	
//Table OA.6
quietly estpost tabstat All Operands LogicalConnectors RegulatoryOperators MathematicalOperators Other, by(title) 
esttab, cells("All(label(`:var lab All') fmt(%12.2fc)) Operands(label(`:var lab Operands') fmt(%12.2fc)) LogicalConnectors(label(`:var lab LogicalConnectors') fmt(%12.2fc)) RegulatoryOperators(label(`:var lab RegulatoryOperators') fmt(%12.2fc)) MathematicalOperators(label(`:var lab MathematicalOperators') fmt(%12.2fc)) Other(label(`:var lab Other') fmt(%12.2fc))") ///
noobs nomtitle nonumber varlabels(`e(labels)') varwidth(20) tex

erase tmp_frac.dta

forval i=1/16 {
erase ./Source_datasets/DFA-titles_processed/cons-count_title_`i'.dta
}