diff --git a/NEED/analyse-NEED-EULF-2014-models.do b/NEED/analyse-NEED-EULF-2014-models.do index aa29c50f9227da28042d29ff0583a9fc7857cd68..afa9f7818788728f3e236906810c09d74902f55f 100644 --- a/NEED/analyse-NEED-EULF-2014-models.do +++ b/NEED/analyse-NEED-EULF-2014-models.do @@ -100,7 +100,9 @@ local pt104 "mid_terr" local pt105 "bung" local pt106 "flat" -* now loop over the energy types +* now loop over the energy types & run linear regression models +* NB - the rounding of the consumption values may lead to modelling problems + local vars "Gcons Econs Allcons" foreach v of local vars { * check distributions of original consumption values diff --git a/NEED/analyse-NEED-EULF-2014.do b/NEED/analyse-NEED-EULF-2014.do deleted file mode 100644 index 9fdac3a9d7c03e063a3fe957edd419f695aee8a5..0000000000000000000000000000000000000000 --- a/NEED/analyse-NEED-EULF-2014.do +++ /dev/null @@ -1,181 +0,0 @@ -* Script to analyse DECC's NEED data to: -* investigate % variance of energy consumption due to dwelling type variables as a way to infer the % of variance due to people - -* NB this script uses 2 data files derived from the original data using the 'process' script - -* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014 -* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518 - -* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton -* b.anderson@soton.ac.uk -* (c) University of Southampton - -* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies -* http://creativecommons.org/licenses/by-nc/4.0/ - -clear all - -capture noisily log close - -* written for Mac OSX - remember to change filesystem delimiter for other platforms -local home "/Users/ben/Documents" -local proot "`home'/Work/Data/Social Science Datatsets/DECC" -* for clam -* local proot "`home'/Work/NEED" -local dpath "`proot'/NEED/End User Licence File 2014/processed" -local rpath "`proot'/results/NEED/" - -local version "v1.1" - -set more off - -log using "`rpath'/analyse-NEED-EULF-2014-`version'-$S_DATE.smcl", replace - -* use the pre-processed wide form file which contains all years of consumption data but not the constant values which are in the xwave file -use "`dpath'/need_eul_may2014_consumptionfile_wide.dta", clear - -* we're goinmg to use 2012 data only - -keep HH_ID *2012* - -* log the consumption as it's very skewed -> becomes semi-normal & OK for linear regression -* Gcons = gas -* Econs = Electricity -* Presumably those without gas use oil or electricity for heating - we don't have oil so we should probably restrict analysis to gas-using hosueholds only to avoid this confounding factor? - -* check what's valid -tab Gcons2012Valid Econs2012Valid, mi // what does G,L,M mean? Presumably O = off gas? -tabstat Gcons2012, by(Gcons2012Valid) s(mean min max n) - -keep if Gcons2012Valid == "V" - -gen log_Gcons2012 = log(Gcons2012) -gen log_Econs2012 = log(Econs2012) - -* combine consumption -* treat missing (gas) as 0 -egen Allcons2012 = rowtotal(Gcons2012 Econs2012) - -gen log_Allcons2012 = log(Allcons2012) -* create log consumption quintiles -egen quinlog_Allcons2012 = cut(log_Allcons2012), group(5) -egen quinlog_Gcons2012 = cut(log_Gcons2012), group(5) -egen quinlog_Econs2012 = cut(log_Econs2012), group(5) - -* merge in the pre-processed cross-year fixed values file -merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile.dta" - -* fix some of the variables - -* combine IMD: this is a bit dodgy as they are not strictly comparable -gen ba_imd = IMD_ENG -replace ba_imd = IMD_WALES if ba_imd == . - -* must use as category variables!! -* set unkown to be 10 -> adds to end of contrasts so can see effect -replace LOFT_DEPTH = 10 if LOFT_DEPTH == . - -* set unkown to be 2020 -> adds to end of contrasts so can see effect -replace BOILER_YEAR = 2020 if BOILER_YEAR == . -replace CWI_YEAR = 2020 if CWI_YEAR == . -replace LI_YEAR = 2020 if LI_YEAR == . - -* 0 = no -destring BOILER, force replace -replace BOILER = 0 if BOILER == . - -* household level vars -local generic_hvars "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_TYPE i.PROP_AGE i.EE_BAND " -local generic_hvarsnp "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_AGE i.EE_BAND " - -* area level vars -local generic_rvars "i.ba_region i.ba_imd" - -* define different property types -local ptypes "101 102 103 104 105 106" -local pt101 "detached" -local pt102 "semi" -local pt103 "end_terr" -local pt104 "mid_terr" -local pt105 "bung" -local pt106 "flat" - -* now loop over the energy types -local vars "Gcons Econs Allcons" -foreach v of local vars { - * check distributions of original consumption values - histogram `v'2012, by(MAIN_HEAT_FUEL, total) name(histo_`v') - tabstat `v'2012, by(MAIN_HEAT_FUEL) s(n mean min max) - - * all hhs model - qui: regress log_`v'2012 `generic_hvars' /// - `generic_rvars' /// - i.BOILER_YEAR - - est store rlog_`v'2012 - di "* -> `v' estat to test for heteroskedasticity & omitted vars" - estat ovtest - estat hettest - - * we ought to be testing for linearity too - di "* -> `v' linktest to test for model specification" - di "* if p of _hatsq < 0.05 -> mis-spec" - di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm" - linktest - - * models by property type - to see if rsq & coefficients vary - foreach p of local ptypes { - di "* -> testing log_`v'2012 for `pt`p''" - qui: regress log_`v'2012 `generic_hvarsnp' /// - `generic_rvars' /// - i.BOILER_YEAR /// - if PROP_TYPE == `p' - est store rlog_`v'2012_`pt`p'' - - di "* -> `v' 2012 `pt`p'' - estat to test for heteroskedasticity & omitted vars" - estat ovtest - estat hettest - - * we ought to be testing for linearity too - di "* -> `v' `pt`p'' linktest to test for model specification" - di "* if p of _hatsq < 0.05 -> mis-spec" - di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm" - linktest - } - * models for different consumption quintiles - to see if rsq & coefficients vary - foreach q of numlist 0/4 { - di "* -> testing log_`v'2012 for quintile: `q'" - qui: regress log_`v'2012 `generic_hvars' /// - `generic_rvars' /// - i.BOILER_YEAR /// - if quinlog_`v'2012 == `q' - est store rlog_`v'2012q`q' - - di "* -> quintile: `q' - estat to test for heteroskedasticity & omitted vars" - estat ovtest - estat hettest - - * we ought to be testing for linearity too - di "* -> quintile: `q' - linktest" - di "* if p of _hatsq < 0.05 -> mis-spec" - di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm" - linktest - } -} - -* output all the results - that's a lot of t tests! -* we could put them all out in one file but it would be really hard to find the ones you want! - -estout rlog_Gcons2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout rlog_Gcons2012q* using "`rpath'/NEED-EULF-2014-log-gas-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout rlog_Gcons2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout rlog_Econs2012 using "`rpath'/NEED-EULF-2014-log-elec-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout rlog_Econs2012q* using "`rpath'/NEED-EULF-2014-log-elec-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout rlog_Econs2012_* using "`rpath'/NEED-EULF-2014-log-elec-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout rlog_Allcons2012 using "`rpath'/NEED-EULF-2014-log-energy-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout rlog_Allcons2012q* using "`rpath'/NEED-EULF-2014-log-energy-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout rlog_Allcons2012_* using "`rpath'/NEED-EULF-2014-log-energy-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) - -di "* Done!" - -log close