Skip to content
Snippets Groups Projects
Commit e8cdd735 authored by Ben Anderson's avatar Ben Anderson
Browse files

amended AR code to only keep the coefficients/lags we calculated

parent 72d8be2d
No related branches found
No related tags found
No related merge requests found
...@@ -35,11 +35,8 @@ GNU General Public License for more details. ...@@ -35,11 +35,8 @@ GNU General Public License for more details.
global where "/Users/ben/Documents/Work" global where "/Users/ben/Documents/Work"
global proot "$where/Projects/ESRC-Transformative-Census2022" global proot "$where/Projects/ESRC-Transformative-Census2022"
* original files
global odfiles "$where/Data/Social Science Datatsets/CER Smart Metering Project"
* processed files * processed files
global pdfiles "$proot/data/cer/CER_OctHH_data" global pdfiles "$proot/data/CER-Irish-SM-Trial/CER_OctHH_data"
* results path * results path
global rpath "$proot/results/CER-Irish-SM-Trial" global rpath "$proot/results/CER-Irish-SM-Trial"
...@@ -47,6 +44,9 @@ global rpath "$proot/results/CER-Irish-SM-Trial" ...@@ -47,6 +44,9 @@ global rpath "$proot/results/CER-Irish-SM-Trial"
global version "v1" global version "v1"
* TO DO:
* get the graphs labels to work correctly!
set more off set more off
clear all clear all
...@@ -59,16 +59,27 @@ timer clear ...@@ -59,16 +59,27 @@ timer clear
timer on 1 timer on 1
* set locals
local midweek "0 1" // 0 = weekends
local midweekt1 "Midweek"
local midweekt0 "Weekends"
* for testing - gets over-written if do_ar_all run
local ids "1002 1003 1004"
* control flow
local do_ar_all 0
local do_merge_all 1
***** *****
* use the pre-created October 2009 dataset if `do_ar_all' {
di "*-> do_ar_all = `do_ar_all' so running ID by ID AR calculations and merging process"
* use the pre-created Census2022 October 2009 dataset
* see https://github.com/dataknut/Census2022/blob/master/Census2022-CER-data-processing.do * see https://github.com/dataknut/Census2022/blob/master/Census2022-CER-data-processing.do
* load 1/2 hour data (don't need survey for this) * load 1/2 hour data (don't need survey for this)
use "$pdfiles/CER_Oct2009HH_30min_survey.dta", clear use "$pdfiles/CER_Oct2009HH_30min_survey.dta", clear
local midweek "0 1" // 0 = weekends
local midweekt1 "Midweek"
local midweekt0 "Weekends"
* we have 36 values 'per day' (not 48) * we have 36 values 'per day' (not 48)
* so lag 35 = this time tomorrow * so lag 35 = this time tomorrow
...@@ -77,7 +88,7 @@ local midweekt0 "Weekends" ...@@ -77,7 +88,7 @@ local midweekt0 "Weekends"
* so set max lags = 150 to cover both (will also mean 2 weekends) * so set max lags = 150 to cover both (will also mean 2 weekends)
local max_lag = 150 local max_lag = 150
local midweektl0 xline(35 70, lstyle(refline )) local midweektl0 xline(35 70, lstyle(refline ))
* the labels don't seem to show up? * the labels don't seem to show up where I expect them to
local midweektll0 text(35 0.5 "This time tomorrow", place(w)) text(70 0.5 "This time next weekend", place(e)) // weekend local midweektll0 text(35 0.5 "This time tomorrow", place(w)) text(70 0.5 "This time next weekend", place(e)) // weekend
local midweektl1 xline(35 70 105, lstyle(refline )) local midweektl1 xline(35 70 105, lstyle(refline ))
...@@ -94,10 +105,7 @@ gen s_hour = hh(s_halfhour) ...@@ -94,10 +105,7 @@ gen s_hour = hh(s_halfhour)
* this creates a big list of all IDs so we can loop over it - takes a while * this creates a big list of all IDs so we can loop over it - takes a while
* could just start from min & loop to max - but would then be testing for non-existent households * could just start from min & loop to max - but would then be testing for non-existent households
qui: levelsof ID, local(ids) *qui: levelsof ID, local(ids)
* for testing
*local ids "1002 1003 1004"
foreach m of local midweek { foreach m of local midweek {
di "****************" di "****************"
...@@ -119,8 +127,7 @@ foreach m of local midweek { ...@@ -119,8 +127,7 @@ foreach m of local midweek {
qui: su s_hour qui: su s_hour
di "Halfhours: `r(min)' - `r(max)' for ID: `id' (`midweekt`m'', N = `ba_nadults' adults, `nch' children, respondent: `econt`econ'')" di "Halfhours: `r(min)' - `r(max)' for ID: `id' (`midweekt`m'', N = `ba_nadults' adults, `nch' children, respondent: `econt`econ'')"
* this draws graph - slow * this draws graph - slow
* do not name the graph as STATA collects them in memory and eventually runs out * do NOT name the graph - STATA will keep them all and eventually run out of memory
* let them replace each other
qui: ac kwh, gen(archr) lags(`max_lag') /// qui: ac kwh, gen(archr) lags(`max_lag') ///
`midweektl`m'' /// draw lines `midweektl`m'' /// draw lines
`midweektll`m'' /// draw line labels `midweektll`m'' /// draw line labels
...@@ -128,30 +135,55 @@ foreach m of local midweek { ...@@ -128,30 +135,55 @@ foreach m of local midweek {
note("Halfhours: `r(min)' - `r(max)' for ID: `id' (`midweekt`m'', `ba_nadults' adult(s), `nch' children, respondent: `econt`econ'')") note("Halfhours: `r(min)' - `r(max)' for ID: `id' (`midweekt`m'', `ba_nadults' adult(s), `nch' children, respondent: `econt`econ'')")
graph export "$rpath/graphs/kwh_archr_hubid`id'_`midweekt`m''_$version.png", replace graph export "$rpath/graphs/kwh_archr_hubid`id'_`midweekt`m''_$version.png", replace
di "* save out arch results for household ID = `id'" di "* save out arch results for household ID = `id' keeping the lags we've calculated (should be `max_lag')"
qui: keep if archr != .
qui: keep lag archr ID qui: keep lag archr ID
qui: save "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta", replace qui: save "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta", replace
di "* Done" di "* Done"
restore restore
} }
} }
}
else {
di "*-> do_ar_all = `do_ar_all' so skipping ID by ID AR calculations"
}
clear clear
if `do_merge_all' {
* pool all the results * pool all the results
foreach m of local midweek { foreach m of local midweek {
di "****************" di "****************"
di "* Loading `midweekt`m'' files" di "* Loading `midweekt`m'' files"
foreach id of local ids { foreach id of local ids {
append using "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta" append using "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta"
erase "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta" *erase "$rpath/tmp/archr-`id'_`midweekt`m''_$version.dta"
} }
* distribution of average values for first 70? * distribution of average values for first 70?
graph box archr if lag <= 105, over(lag) graph box archr if lag <= 105, over(lag)
graph export "$rpath/graphs/box_mean_archr_all_hubids_`midweekt`m''_$version.png", replace graph export "$rpath/tmp/box_mean_archr_all_hubids_`midweekt`m''_$version.png", replace
save "$rpath/archr-all_hubids_`midweekt`m''_$version.dta", replace * tab ID
save "$rpath/tmp/archr-all_hubids_`midweekt`m''_$version.dta", replace
* save .csv file for R
outsheet using "$rpath/tmp/archr-all_hubids_`midweekt`m''_$version.csv", comma replace
}
}
else {
di "*-> do_merge_all = `do_merge_all' so skipping merging process"
} }
/*
* graph aggregated results
foreach m of local midweek {
di "****************"
di "* Loading `midweekt`m'' files"
use "$pdfiles/archr-all_hubids_`midweekt`m''_$version.dta", clear
* distribution of average values for first 70?
graph box archr if lag <= 105, over(lag)
graph export "$rpath/graphs/box_mean_archr_all_hubids_`midweekt`m''_$version.png", replace
outsheet
}
*/
timer off 1 timer off 1
di "Time taken:" di "Time taken:"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment