diff --git a/Census2022-CER-calculate-AR.do b/Census2022-CER-calculate-AR.do index aaef4ac93d1475d2919869760ea52f423b609622..7fd836286a9a7bf50fb7f0099e681f5af7286960 100644 --- a/Census2022-CER-calculate-AR.do +++ b/Census2022-CER-calculate-AR.do @@ -35,11 +35,8 @@ GNU General Public License for more details. global where "/Users/ben/Documents/Work" global proot "$where/Projects/ESRC-Transformative-Census2022" -* original files -global odfiles "$where/Data/Social Science Datatsets/CER Smart Metering Project" - * processed files -global pdfiles "$proot/data/cer/CER_OctHH_data" +global pdfiles "$proot/data/CER-Irish-SM-Trial/CER_OctHH_data" * results path global rpath "$proot/results/CER-Irish-SM-Trial" @@ -47,6 +44,9 @@ global rpath "$proot/results/CER-Irish-SM-Trial" global version "v1" +* TO DO: +* get the graphs labels to work correctly! + set more off clear all @@ -59,99 +59,131 @@ timer clear timer on 1 -***** -* use the pre-created October 2009 dataset -* see https://github.com/dataknut/Census2022/blob/master/Census2022-CER-data-processing.do - -* load 1/2 hour data (don't need survey for this) -use "$pdfiles/CER_Oct2009HH_30min_survey.dta", clear - +* set locals local midweek "0 1" // 0 = weekends local midweekt1 "Midweek" local midweekt0 "Weekends" -* we have 36 values 'per day' (not 48) -* so lag 35 = this time tomorrow -* for mid-week, this time next week = lag 105 -* for weekend, this time next week = lag 70 -* so set max lags = 150 to cover both (will also mean 2 weekends) -local max_lag = 150 -local midweektl0 xline(35 70, lstyle(refline )) -* the labels don't seem to show up? -local midweektll0 text(35 0.5 "This time tomorrow", place(w)) text(70 0.5 "This time next weekend", place(e)) // weekend +* for testing - gets over-written if do_ar_all run +local ids "1002 1003 1004" -local midweektl1 xline(35 70 105, lstyle(refline )) -* the labels don't seem to show up? -local midweektll1 text(35 0 "This time tomorrow", place(w)) text(70 0 "This time the day after tomorrow", place(w)) text(105 0 "This time next week", place(w)) // midweek +* control flow +local do_ar_all 0 +local do_merge_all 1 -local econt1 "In work" -local econt2 "Unemployed" -local econt3 "Retired" -local econt4 "Caring for relative or family" - -* filter for -gen s_hour = hh(s_halfhour) - -* this creates a big list of all IDs so we can loop over it - takes a while -* could just start from min & loop to max - but would then be testing for non-existent households -qui: levelsof ID, local(ids) - -* for testing -*local ids "1002 1003 1004" - -foreach m of local midweek { - di "****************" - di "* Calculating ARs for midweek = `m' (`midweekt`m'') " - foreach id of local ids { - di "* -> ID = `id'" - - preserve - * take out 00:00 - 06:00 as repeat cycle of sleep will swamp other cycles - keep if ID == `id' & midweek == `m' & s_hour >= 6 - * need to fool ac into thinking these observations are continuous - qui: su ID - qui: egen lag = fill(1(1)`r(N)') - tsset lag - * do this here so local is set to the unique valuye for this hh - qui: levelsof ba_nchildren, local(nch) - qui: levelsof ba_nadults, local(ba_nadults) - qui: levelsof ba_empl, local(econ) - qui: su s_hour - di "Halfhours: `r(min)' - `r(max)' for ID: `id' (`midweekt`m'', N = `ba_nadults' adults, `nch' children, respondent: `econt`econ'')" - * this draws graph - slow - * do not name the graph as STATA collects them in memory and eventually runs out - * let them replace each other - qui: ac kwh, gen(archr) lags(`max_lag') /// - `midweektl`m'' /// draw lines - `midweektll`m'' /// draw line labels - xlabel(0(35)140) /// - note("Halfhours: `r(min)' - `r(max)' for ID: `id' (`midweekt`m'', `ba_nadults' adult(s), `nch' children, respondent: `econt`econ'')") - graph export "$rpath/graphs/kwh_archr_hubid`id'_`midweekt`m''_$version.png", replace - - di "* save out arch results for household ID = `id'" - qui: keep lag archr ID - qui: save "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta", replace - di "* Done" - restore +***** +if `do_ar_all' { + di "*-> do_ar_all = `do_ar_all' so running ID by ID AR calculations and merging process" + * use the pre-created Census2022 October 2009 dataset + * see https://github.com/dataknut/Census2022/blob/master/Census2022-CER-data-processing.do + + * load 1/2 hour data (don't need survey for this) + use "$pdfiles/CER_Oct2009HH_30min_survey.dta", clear + + + * we have 36 values 'per day' (not 48) + * so lag 35 = this time tomorrow + * for mid-week, this time next week = lag 105 + * for weekend, this time next week = lag 70 + * so set max lags = 150 to cover both (will also mean 2 weekends) + local max_lag = 150 + local midweektl0 xline(35 70, lstyle(refline )) + * the labels don't seem to show up where I expect them to + local midweektll0 text(35 0.5 "This time tomorrow", place(w)) text(70 0.5 "This time next weekend", place(e)) // weekend + + local midweektl1 xline(35 70 105, lstyle(refline )) + * the labels don't seem to show up? + local midweektll1 text(35 0 "This time tomorrow", place(w)) text(70 0 "This time the day after tomorrow", place(w)) text(105 0 "This time next week", place(w)) // midweek + + local econt1 "In work" + local econt2 "Unemployed" + local econt3 "Retired" + local econt4 "Caring for relative or family" + + * filter for + gen s_hour = hh(s_halfhour) + + * this creates a big list of all IDs so we can loop over it - takes a while + * could just start from min & loop to max - but would then be testing for non-existent households + *qui: levelsof ID, local(ids) + + foreach m of local midweek { + di "****************" + di "* Calculating ARs for midweek = `m' (`midweekt`m'') " + foreach id of local ids { + di "* -> ID = `id'" + + preserve + * take out 00:00 - 06:00 as repeat cycle of sleep will swamp other cycles + keep if ID == `id' & midweek == `m' & s_hour >= 6 + * need to fool ac into thinking these observations are continuous + qui: su ID + qui: egen lag = fill(1(1)`r(N)') + tsset lag + * do this here so local is set to the unique valuye for this hh + qui: levelsof ba_nchildren, local(nch) + qui: levelsof ba_nadults, local(ba_nadults) + qui: levelsof ba_empl, local(econ) + qui: su s_hour + di "Halfhours: `r(min)' - `r(max)' for ID: `id' (`midweekt`m'', N = `ba_nadults' adults, `nch' children, respondent: `econt`econ'')" + * this draws graph - slow + * do NOT name the graph - STATA will keep them all and eventually run out of memory + qui: ac kwh, gen(archr) lags(`max_lag') /// + `midweektl`m'' /// draw lines + `midweektll`m'' /// draw line labels + xlabel(0(35)140) /// + note("Halfhours: `r(min)' - `r(max)' for ID: `id' (`midweekt`m'', `ba_nadults' adult(s), `nch' children, respondent: `econt`econ'')") + graph export "$rpath/graphs/kwh_archr_hubid`id'_`midweekt`m''_$version.png", replace + + di "* save out arch results for household ID = `id' keeping the lags we've calculated (should be `max_lag')" + qui: keep if archr != . + qui: keep lag archr ID + qui: save "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta", replace + di "* Done" + restore + } } } +else { + di "*-> do_ar_all = `do_ar_all' so skipping ID by ID AR calculations" +} clear -* pool all the results +if `do_merge_all' { + * pool all the results + foreach m of local midweek { + di "****************" + di "* Loading `midweekt`m'' files" + foreach id of local ids { + append using "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta" + *erase "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta" + } + * distribution of average values for first 70? + graph box archr if lag <= 105, over(lag) + graph export "$rpath/tmp/box_mean_archr_all_hubids_`midweekt`m''_$version.png", replace + * tab ID + save "$rpath/tmp/archr-all_hubids_`midweekt`m''_$version.dta", replace + * save .csv file for R + outsheet using "$rpath/tmp/archr-all_hubids_`midweekt`m''_$version.csv", comma replace + } +} +else { + di "*-> do_merge_all = `do_merge_all' so skipping merging process" +} + +/* +* graph aggregated results foreach m of local midweek { di "****************" di "* Loading `midweekt`m'' files" - foreach id of local ids { - append using "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta" - erase "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta" - } + use "$pdfiles/archr-all_hubids_`midweekt`m''_$version.dta", clear * distribution of average values for first 70? graph box archr if lag <= 105, over(lag) graph export "$rpath/graphs/box_mean_archr_all_hubids_`midweekt`m''_$version.png", replace - save "$rpath/archr-all_hubids_`midweekt`m''_$version.dta", replace + outsheet } - +*/ timer off 1 di "Time taken:"