diff --git a/NEED/analyse-NEED-EULF-2014-electricity-consumption.do b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do index a4f945de8a014da4dfec65329b839adf11ed1501..96635e0739f904cf71db24a904a15dd81fc8a74a 100644 --- a/NEED/analyse-NEED-EULF-2014-electricity-consumption.do +++ b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do @@ -35,11 +35,8 @@ GNU General Public License for more details. * Requires: estout - clear all -capture noisily log close - set more off * written for Mac OSX - remember to change filesystem delimiter for other platforms @@ -47,11 +44,16 @@ global home "~" global dpath "$home/Documents/Work/Data/Social Science Datasets/DECC/NEED/End User Licence File 2014/processed" -global rpath "$home/Dropbox/RSS-2015/results" +*global rpath "$home/Dropbox/RSS-2015/NEED/results" +global rpath "$home/Documents/Work/Papers and Conferences/RSS-2015/NEED/results" -local version "v1" +local version "v1" // version management via github -log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace +capture noisily log close _all + +* start main log +* each subsection has own log +log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_main.smcl", replace name(main) * set sample * 100 = 100pc @@ -69,7 +71,8 @@ local need_weight = "[pw = WEIGHT]" // use probability weights local do_desc = 0 // do descriptives local do_histo = 0 // toggle graph drawing local do_box = 0 // toggle graph drawing -local do_xsec = 1 // run cross-sectional analysis +local do_excl = 0 // tests for correlates with exclusion +local do_xsec = 0 // run cross-sectional analysis local do_longit = 1 // run longitudinal analysis @@ -111,13 +114,19 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0 * set up correct long form 'is X present in year' using year (if known) local vars "BOILER LI CWI" -* what will hapen if there are multiple replacements in a household +* what will hapen if there are multiple replacements in a household? foreach v of local vars { gen ba_have_`v' = 0 destring `v'_YEAR, force replace replace ba_have_`v' = 1 if `v'_YEAR <= year } +* make sure test vars are destringed +local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER" +foreach tv of local tvars { + destring `tv', force replace +} + * labels lab var Econs "Electricity (kWh/year)" lab var Gcons "Gas (kWh/year)" @@ -140,6 +149,13 @@ foreach v of local setupvars { lab val `v'Validr `v'Validr tab `v'Validr `v'Valid + + gen `v'_excl_hi = 0 if `v'Validr == 1 // valid + replace `v'_excl_hi = 1 if `v'Validr == 4 // high + gen `v'_excl_lo = 0 if `v'Validr == 1 // valid + replace `v'_excl_lo = 1 if `v'Validr == 3 // low + + * set up consumption deciles and outlier flags gen u99_`v' = 0 gen l99_`v' = 0 @@ -149,7 +165,7 @@ foreach v of local setupvars { levelsof(year), local(levels) foreach l of local levels { di "* Calculating consumption deciles and outlier flags for `v' for `l'" - + * creates missing for other years have to do this as egen does not allow by & can't 'replace' di "* `v' deciles for `l'" egen `v'_dec_`l' = cut(`v') if year == `l', group(10) @@ -173,6 +189,7 @@ foreach v of local setupvars { *tab m90_`v' if year == `l', mi } + * now combine the deciles - set missing option otherwise it counts a row where all are missing as 0 egen `v'_dec = rowtotal(`v'_dec_*), missing * remove temporary ones @@ -190,9 +207,6 @@ foreach v of local setupvars { lab var log`v' "Log `v'" } -* set 'survey' weight -svyset `need_weight' - * flag dwellings which are off gas for electricity * NB - in this dataset we don't know if they use electricity as main heat (could be oil) gen ba_off_gas = 0 @@ -200,19 +214,25 @@ replace ba_off_gas = 1 if GconsValidr == 2 lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)" lab val ba_off_gas ba_off_gas -* check -svy: mean Gcons Econs, over(ba_off_gas) -di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'" +* add Gcons to loop over gas +local descvars "Econs" +if `do_desc' { + log off main + log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'-do_desc.smcl", replace name(do_desc) + + * set 'survey' weight + svyset `need_weight' + * check + svy: mean Gcons Econs, over(ba_off_gas) + di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'" -table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'? + table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'? -table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) -* roughly constant rate throughout years -table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons) -* but off gas have no gas readings as you'd expect (DECC applied filter) + table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) + * roughly constant rate throughout years + table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons) + * but off gas have no gas readings as you'd expect (DECC applied filter) -local descvars "Econs" -if `do_desc' { foreach v of local descvars { di "***************" di "* Testing `v' for `sample'% sample" @@ -268,26 +288,72 @@ if `do_desc' { graph export "$rpath/graphs/NEED-EULF-2014-`sample'pc-box_`v'_yr_ee_valid.png", replace } - - di "* check the distributions of the outliers" - local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG" - foreach tv of local tvars { - di "* Checking top 1% against `tv'" - tab `tv' u99_Econs , col - } - + } + log close do_desc + log on main +} +if `do_excl' { + log off main + log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_excl.smcl", replace name(do_excl) + di "* running exclusion analysis" + di "* check the distributions of the outliers" + di "* % excluded as too high" + tab year Econs_excl_hi + di "* % excluded as too low" + tab year Econs_excl_lo + di "* exclusion models" + + local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG" + foreach tv of local tvars { + di "* Checking Econs lowest 1% (+L) against `tv'" + tab `tv' u99_Econs , col + di "* Checking Econs highest 1% (+G) against `tv'" + tab `tv' u99_Econs , col + + di "* Checking Econs L against `tv'" + tab `tv' Econs_excl_lo , col + di "* Checking Econs G against `tv'" + tab `tv' Econs_excl_hi , col + } + + levelsof(year), local(years) + foreach y of local years { + di "* testing exclusions for `y'" + * use capture to avoid models failing where no exclusions (2010 ->) + capture noisily { + di "* Hi" + qui: logit Econs_excl_hi i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE /// + i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas /// + if year == `y' + estat gof + est store Econs_excl_hi_`y' + } + capture noisily { + di "* Lo" + qui: logit Econs_excl_lo i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE /// + i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas /// + if year == `y' + estat gof + est store Econs_excl_lo_`y' + } } + estout Econs_excl_hi* using "$rpath/logit_Econs_excl_hi_`version'.txt", /// + cells("b se ci_u ci_l _star") /// + stats(r2_p ll N) replace + estout Econs_excl_lo* using "$rpath/logit_Econs_excl_lo_`version'.txt", /// + cells("b se ci_u ci_l _star") /// + stats(r2_p ll N) replace + + log close do_excl + log on main } if `do_xsec' { - di "* Running cross sectional analysis using 2012 (all valid cases)" - * make sure test vars are destringed - local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER" - foreach tv of local tvars { - destring `tv', force replace - } + log off main + log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_xsec.smcl", replace name(do_xsec) + di "* Running cross sectional analysis for electricity using 2012 (all valid cases)" * kitchen sink model - use 99% signif level * use vce(robust) as running without & using hettest suggests heteroscedasticity * regress would throw out collinear variables @@ -309,11 +375,11 @@ if `do_xsec' { * Plot residuals * pnormal plot of residuals pnorm logEconsr, name(pnorm_logEconsr) - graph export "$rpath/graphs/pnorm_logEconsr.png", replace + graph export "$rpath/graphs/pnorm_logEconsr_2012.png", replace * qnormal plot qnorm logEconsr, name(qnorm_logEconsr) - graph export "$rpath/graphs/qnorm_logEconsr.png", replace + graph export "$rpath/graphs/qnorm_logEconsr_2012.png", replace * test the null hypothesis that the variance of the residuals is homogenous. * Therefore, if the p-value is very small, we would have to reject the hypothesis @@ -348,10 +414,21 @@ if `do_xsec' { * test BIC etc estat ic + + * preduce margins plot for floor area + margins i.FLOOR_AREA_BAND + marginsplot + + graph box Econs if year == 2012, over(FLOOR_AREA_BAND) + graph export "$rpath/graphs/box_Econs_floor_area_2012.png", replace + log close do_xsec + log on main } if `do_longit' { + log off main + log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_longit.smcl", replace name(do_longit) di "* Running longitudinal analysis" di "* Check boiler transitions" xttrans ba_have_BOILER, freq @@ -415,8 +492,10 @@ if `do_longit' { est store xtr_re_logEcons estout xtr_re_logEcons using "$rpath/xtr_re_logEcons_`version'.txt", cells("b se ci_u ci_l _star ") stats(r2_w r2_b r2_o rmse N sigma_u sigma_e, fmt(%9.3f %9.0g)) replace } + log close do_longit + log on main } di "* Done!" -log close +log close main