Hello everyone,

I am at my first attempt at importing, cleaning and appending a large amount of datasets. I am working with yearly (2004-2014) balance sheet data for 8000 Italian municipalities. Each Comune is divided in year and each year in "quadri". I need to create one file per comune with all years (and will eventually append them all in a single panel dataset). I have started with a very inefficient method: the only one I am able to use. This implies four big loops (importing, saving yearly files per each municipality, cleaning, appending) and saving lots of datasets. Could you help me making the process more efficient? I will probably need to run the code more than once and my approach is taking forever to work. Also, I tried to work with -tempfile- but had issues using it in combination with locals (in the filenames).

This is how my code is structured:
Code:
clear all
set more off

cd "C:\bilanci_unzip"
qui folders

local com_list `"`r(folders)'"'

*IMPORTING LOOP
set more off
log using "C:/Users/Fabio M/Documents/Research/Data/log_BF_import", text replace
local years 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
local quadro 01 02 03 13
foreach c of local com_list{
foreach y of local years{
foreach q of local quadro{
capture noisily cd "C:/bilanci_unzip/`c'/csv/`c'/`y'/Consuntivo/`q'"
capture noisily fs *
 foreach f in `r(files)'{
    local F : subinstr local f ".csv" ""
    capture noisily import delimited using "C:/bilanci_unzip/`c'/csv/`c'/`y'/Consuntivo/`q'/`f'", delimiter(";") varnames(1) clear

[...]

    capture noisily cd "C:/bilanci_unzip/`c'/csv/`c'/`y'"
    capture noisily save `"`a'_`y'_`q'_`F'"', replace
    }
}
}
}
log close


*CREATE YEARLY DATASETS PER EACH COMUNE
clear
set more off
log using "C:/Users/Fabio M/Documents/Research/Data/log_BF_yearly", text replace

local years 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
foreach c of local com_list{
foreach y of local years{
        clear
        cd "C:/bilanci_unzip/`c'/csv/`c'/`y'"
        fs *
foreach f in `r(files)'{
            append using `"`f'"'
            *this is for having all the right info in labels3
            replace labels3 = labels2 if labels3==""
            }   //close 'files' loop
    replace voci = servizioeattivitarilevanti if voci=="" & servizioeattivitarilevanti!=""

    set obs `=_N+1'
    replace voci="year" if voci==""
    replace dati=`y' if voci=="year"

    *save one file for each comune
    cd "C:/bilanci_unzip/`c'/csv/`c'/"
    save "`c'_`y'", replace
        }  //close 'year' loop
}  //close 'comune' loop
log close

*CLEANING LOOP
clear
//local com_list `"abano-terme--2050540010"'
log using "C:/Users/Fabio M/Documents/Research/Data/log_BF_clean", text replace
foreach c of local com_list{
cd "C:/bilanci_unzip/`c'/csv/`c'"
fs *
foreach f in `r(files)'{
    use `"`f'"', clear
[...]
    keep voci dati
[...]
    sxpose, clear
    renvars, map(strtoname(@[1]))
    drop in 1
[...]
    save, replace
}
}    
log close

*APPENDING LOOP
log using "C:/Users/Fabio M/Documents/Research/Data/log_BF_append", text replace
foreach c of local com_list{
clear
cd "C:/bilanci_unzip/`c'/csv/`c'"
fs *
foreach f in `r(files)'{
            append using `"`f'"'
            }
[...]
cd "C:/bilanci_unzip/`c'/csv/"
save `"`c'"', replace            
}
log close
Thank you for your attention,

Fabio

PS:

This is the complete code, for reference:
Code:
********************************************************************************
*************************dati bilanci cleaning**********************************
********************************************************************************
clear all
set more off

cd "C:\bilanci_unzip"
qui folders

local com_list `"`r(folders)'"'

*IMPORTING LOOP
set more off
log using "C:/Users/Fabio M/Documents/Research/Data/log_BF_import", text replace
//local com_list `"abano-terme--2050540010"'
local years 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
local quadro 01 02 03 13
foreach c of local com_list{
foreach y of local years{
foreach q of local quadro{
capture noisily cd "C:/bilanci_unzip/`c'/csv/`c'/`y'/Consuntivo/`q'"
capture noisily fs *
 foreach f in `r(files)'{
    local F : subinstr local f ".csv" ""
    capture noisily import delimited using "C:/bilanci_unzip/`c'/csv/`c'/`y'/Consuntivo/`q'/`f'", delimiter(";") varnames(1) clear
    
    *useful for knowing what data refer to
    capture noisily gen labels = "`F'"
    capture noisily split labels, parse(dicembre -i- -ii- -iii- -iv- -v- -vi- -1- -2- -3- -4- -5- -6- -7- -8- -9- -10- -11- -12- -13- -14- -15- -101- -102- -103-)
    
    *rename variables that change name
    cap rename descrizionevoci voci
    cap rename importooquantitã parametrodiefficaciaefficienzafo
    
    capture noisily gen comune = "`c'`y'`q'"
    capture noisily replace comune = subinstr(comune,"-"," ", 99999)
    capture noisily split comune, parse("  ")
    capture noisily drop comune comune2
    capture noisily rename comune1 comune
    
    *destring conditioning on varibale being of type 'string'
    capture confirm string var dati
        if _rc==0 {
        replace dati="1" if dati=="S"
        replace dati="0" if dati=="N"
        replace dati="." if dati=="N.C."
        replace dati = subinstr(dati,".","", 99999)
        replace dati = subinstr(dati,",",".", 99999)
        destring dati, replace    
        }

    *tostring parametrodiefficaciaefficienzafo if it is numeric [_rc==7]
    cap confirm string var parametrodiefficaciaefficienzafo
        if _rc==7 {
            gen param = strofreal(parametrodiefficaciaefficienzafo, "%10.5g")
            drop parametrodiefficaciaefficienzafo
            rename param parametrodiefficaciaefficienzafo
            }

    *replace "N.C" with missing value "."
    capture confirm string var parametrodiefficaciaefficienzafo
        if _rc==0 {
        replace parametrodiefficaciaefficienzafo="." if parametrodiefficaciaefficienzafo=="N.C."    
        }
        
    //tempfile t_`"`a'_`y'_`q'"'
    capture noisily cd "C:/bilanci_unzip/`c'/csv/`c'/`y'"
    capture noisily save `"`a'_`y'_`q'_`F'"', replace
    //save `"`a'`y'`q'"', replace
    
    //tempfile `c'`y'`q'
    //save "C:/bilanci_unzip/`c'/``c'`y'`q''.dta"
    }
}
}
}
log close

//browse

*CREATE YEARLY DATASETS PER EACH COMUNE
clear
set more off
log using "C:/Users/Fabio M/Documents/Research/Data/log_BF_yearly", text replace
//local com_list `"abano-terme--2050540010"'
local years 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
foreach c of local com_list{
foreach y of local years{
        clear
        cd "C:/bilanci_unzip/`c'/csv/`c'/`y'"
        fs *
foreach f in `r(files)'{
            append using `"`f'"'
            *this is for having all the right info in labels3
            replace labels3 = labels2 if labels3==""
            }   //close 'files' loop
    replace voci = servizioeattivitarilevanti if voci=="" & servizioeattivitarilevanti!=""

    set obs `=_N+1'
    replace voci="year" if voci==""
    replace dati=`y' if voci=="year"

    *save one file for each comune
    cd "C:/bilanci_unzip/`c'/csv/`c'/"
    save "`c'_`y'", replace
        }  //close 'year' loop
}  //close 'comune' loop
log close

*CLEANING LOOP
clear
//local com_list `"abano-terme--2050540010"'
log using "C:/Users/Fabio M/Documents/Research/Data/log_BF_clean", text replace
foreach c of local com_list{
cd "C:/bilanci_unzip/`c'/csv/`c'"
fs *
foreach f in `r(files)'{
    use `"`f'"', clear
    *destring
    local numeric accertamenti impegni parametrodiefficaciaefficienzafo
    foreach n of local numeric{
        capture confirm string var `n'
        if _rc==0 {    
        replace `n' = subinstr(`n',".","", 99999)
        replace `n' = subinstr(`n',",",".", 99999)
        destring `n', replace
        }
        }
    local useful accertamenti impegni parametrodiefficaciaefficienzafo
    foreach u of local useful{
        replace dati = `u' if dati ==. & `u'!=.
        }
    replace dati = parametrodiefficaciaefficienzafo if dati==. & parametrodiefficaciaefficienzafo!=.
    *keep only useful variables
    //keep comune voci funzionieserviziinterventicorren dati accertamenti impegni impegni_accert
    drop riscoss* pagam* servizioe*
    //drop if impegni_accert==.
    rename parametrodiefficaciaefficienzafo efficaciaefficienza
    
    *drop useless data
    gen useless = strpos("servizi-indispensabili-per-comuni-ed-unioni-di-comuni",labels2) & (labels3!="acquedotto" & labels3!="fognatura-e-depurazione" & labels3!="nettezza-urbana" & labels3!="viabilita-ed-illuminazione-pubblica")
    drop if useless==1 & voci!="year"
    drop if voci=="Forma di gestione (codice)" | voci=="Unità immobiliari servite" | voci=="Totale unità immobiliari" | voci=="N.unità immobiliari servite" ///
    | voci=="Terreni Agricoli" | voci=="Aree Fabbricabili" | voci=="N. personale docente" | voci=="Costo totale diretto"  | voci=="N. personale non docente"  ///
    | voci=="N. personale amministrativo"  | voci=="N.studenti iscritti al 30.09" | voci=="N. aule disponibili al 30.09" | voci=="Forma di gestione" ///
    | voci=="Contributo compensativo minor gettito imu derivante da modifiche normative (art. 1 e 3 D.L. 102/13)"


    *rename voci which are identical
    replace voci="costo acquedotto" if voci=="Costo totale diretto" & labels3=="acquedotto"
    replace voci="costo fogna" if voci=="Costo totale diretto" & labels3=="fognatura-e-depurazione"
    replace voci="costo nettezza" if voci=="Costo totale diretto" & labels3=="nettezza-urbana"
    replace voci="costo viabilita" if voci=="Costo totale diretto" & labels3=="viabilita-ed-illuminazione-pubblica"

    *drop all useless duplicate voci
    drop if labels=="quadro-1-si-certifica" //to avoid 'year' to be  cancelled as duplicate in years 2013, 2014
    duplicates tag voci, gen(dup)
    drop if dup!=0
    drop dup
    
    keep voci dati
    gen lower = lower(voci)
    drop voci
    rename lower voci
    
    drop if voci=="di cui : abitazione principale" | voci=="altri fabbricati"

    *make everything string, transpose and destring everything again
    //tostring dati, gen(dat) format("%9.3f")
    //drop dat
    gen dat = strofreal(dati, "%10.5g")
    drop dati
    sxpose, clear
    renvars, map(strtoname(@[1]))
    drop in 1

    *rename all variables with lowercase
    *rename *, lower
    
    save, replace
}
}    
log close

*APPENDING LOOP
//local com_list `"abano-terme--2050540010"'
log using "C:/Users/Fabio M/Documents/Research/Data/log_BF_append", text replace
foreach c of local com_list{
clear
cd "C:/bilanci_unzip/`c'/csv/`c'"
fs *
foreach f in `r(files)'{
            append using `"`f'"'
            }
            
*comune variable
gen comune = "`c'"
replace comune = subinstr(comune,"-"," ", 99999)
split comune, parse("  ")
drop comune
rename comune2 cod_elettor
rename comune1 comune    

*tidy var's order    
order comune, first    
order year, after(comune)
order cod_elettor, after(comune)

*destring all
destring *, replace

*fix cod_elettor's format
format %12.0g cod_elettor

cd "C:/bilanci_unzip/`c'/csv/"
save `"`c'"', replace            
}
log close