diff --git a/NEED/analyse-NEED-EULF-2014-descriptives.do b/NEED/analyse-NEED-EULF-2014-descriptives.do index 639724a255b803df7bd063ab59ce19fcca089e64..6864d6adcd11d216ddf31b8ab83f68874fd4f998 100644 --- a/NEED/analyse-NEED-EULF-2014-descriptives.do +++ b/NEED/analyse-NEED-EULF-2014-descriptives.do @@ -28,11 +28,14 @@ local rpath "`proot'/results/NEED" local version "v1.1" * quick tests for 2012 -local do_2012_desc = 0 +local do_2012_desc = 1 * tests for all years using long file - takes a while local do_long_desc = 1 +* toggle graph drawing +local do_graphs = 0 + set more off log using "`rpath'/analyse-NEED-EULF-2014-descriptives-`version'-$S_DATE.smcl", replace @@ -48,13 +51,26 @@ if `do_2012_desc' { * processor intensive local vars "Econs2012 Gcons2012" local tvars "EE_BAND FLOOR_AREA_BAND PROP_AGE" + * test values for valid - check for valid 0s for example. This only happens for gas where: + * gas cons > 100 (so included) but cons < 250 so 'rounded' to nearest 500 = 0 + + * elec always rounded to nearest 50 so min shoudl always be 100 + foreach v of local vars { + tabstat `v', by(`v'Valid) s(n mean semean min max) foreach tv of local tvars { - histogram `v' , by(`tv') name(h_`tv'_`v'_`sample') - graph export "`rpath'/NEED-EULF-2014-histo_`v'_by_`tv'_`sample'.png", replace + di "***************" + di "* Testing `tv'" + * test values for `tv' - check for 0s for example + tabstat `v' if `v'Valid == "V", by(`tv') s(n mean semean min max) + tab `v' if `v' < 1000 + if `do_graphs' { + histogram `v' if `v'Valid == "V", by(`tv') name(h_`tv'_`v'_`sample') + graph export "`rpath'/NEED-EULF-2014-histo_`v'_by_`tv'_`sample'_valid.png", replace - graph box `v', over(`tv') name(b_`tv'_`v'_`sample') - graph export "`rpath'/NEED-EULF-2014-box_`v'_by_`tv'_`sample'.png", replace + graph box `v' if `v'Valid == "V", over(`tv') name(b_`tv'_`v'_`sample') + graph export "`rpath'/NEED-EULF-2014-box_`v'_by_`tv'_`sample'_valid.png", replace + } } } } @@ -62,7 +78,7 @@ if `do_2012_desc' { if `do_long_desc' { * Now use the pre-processed long form file which contains all years of consumption data but not the constant values (housing charactersitics etc) which are in the xwave file * do this for each random sample of differening sizes as a check - local samples "10 20 130 40 50 100" + local samples "10 20 30 40 50 100" foreach s of local samples { di "************************" di "* Using `s'% sample" @@ -71,23 +87,33 @@ if `do_long_desc' { * set as panel in case it wasn't xtset HH_ID year + * examine panel status xtdescribe - - xtsum Econs Gcons - - * summarise Electricity - table EconsValid year, c(count Econs min Econs mean Econs max Econs) - * summarise Gas - table GconsValid year, c(count Gcons min Gcons mean Gcons max Gcons) - - * distributions by year + + * distributions for valid obs local vars "Econs Gcons" foreach v of local vars { - histogram `v', by(year) name(histo_`s'pc_`v') - graph export "`rpath'/NEED-EULF-2014-`s'pc-histo_`v'_by_year.png", replace - graph box `v', over(year) name(box_`s'pc_`v') - graph export "`rpath'/NEED-EULF-2014-`s'pc-box_`v'_by_year.png", replace + * overall + xtsum `v' if `v'Valid == "V" + * test values for valid - check for valid 0s for example. This only happens for gas where: + * gas cons > 100 (so included) but cons < 250 so 'rounded' to nearest 500 = 0 + + * elec always rounded to nearest 50 so min shoudl always be 100 + tabstat `v', by(`v'Valid) s(n mean semean min max) + * by year + * check for 0s + table `v' year if `v' < 1000 + table `v'Valid year, c(count `v' min `v' mean `v' max `v') + + if `do_graphs' { + histogram `v' if `v'Valid == "V", by(year) name(histo_`s'pc_`v') + graph export "`rpath'/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace + graph box `v' if `v'Valid == "V", over(year) name(box_`s'pc_`v') + graph export "`rpath'/NEED-EULF-2014-`s'pc-box_`v'_by_year_valid.png", replace + } } + * check the years where gas = valid but consumption = 0 + * Presumably in these cases gas > 100 but < 249 ? } }