Commit 4637dc68 authored by Ben Anderson's avatar Ben Anderson
Browse files

updated scripts

parent 546623d4
......@@ -100,7 +100,9 @@ local pt104 "mid_terr"
local pt105 "bung"
local pt106 "flat"
* now loop over the energy types
* now loop over the energy types & run linear regression models
* NB - the rounding of the consumption values may lead to modelling problems
local vars "Gcons Econs Allcons"
foreach v of local vars {
* check distributions of original consumption values
......
* Script to analyse DECC's NEED data to:
* investigate % variance of energy consumption due to dwelling type variables as a way to infer the % of variance due to people
* NB this script uses 2 data files derived from the original data using the 'process' script
* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
* b.anderson@soton.ac.uk
* (c) University of Southampton
* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
* http://creativecommons.org/licenses/by-nc/4.0/
clear all
capture noisily log close
* written for Mac OSX - remember to change filesystem delimiter for other platforms
local home "/Users/ben/Documents"
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
local dpath "`proot'/NEED/End User Licence File 2014/processed"
local rpath "`proot'/results/NEED/"
local version "v1.1"
set more off
log using "`rpath'/analyse-NEED-EULF-2014-`version'-$S_DATE.smcl", replace
* use the pre-processed wide form file which contains all years of consumption data but not the constant values which are in the xwave file
use "`dpath'/need_eul_may2014_consumptionfile_wide.dta", clear
* we're goinmg to use 2012 data only
keep HH_ID *2012*
* log the consumption as it's very skewed -> becomes semi-normal & OK for linear regression
* Gcons = gas
* Econs = Electricity
* Presumably those without gas use oil or electricity for heating - we don't have oil so we should probably restrict analysis to gas-using hosueholds only to avoid this confounding factor?
* check what's valid
tab Gcons2012Valid Econs2012Valid, mi // what does G,L,M mean? Presumably O = off gas?
tabstat Gcons2012, by(Gcons2012Valid) s(mean min max n)
keep if Gcons2012Valid == "V"
gen log_Gcons2012 = log(Gcons2012)
gen log_Econs2012 = log(Econs2012)
* combine consumption
* treat missing (gas) as 0
egen Allcons2012 = rowtotal(Gcons2012 Econs2012)
gen log_Allcons2012 = log(Allcons2012)
* create log consumption quintiles
egen quinlog_Allcons2012 = cut(log_Allcons2012), group(5)
egen quinlog_Gcons2012 = cut(log_Gcons2012), group(5)
egen quinlog_Econs2012 = cut(log_Econs2012), group(5)
* merge in the pre-processed cross-year fixed values file
merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile.dta"
* fix some of the variables
* combine IMD: this is a bit dodgy as they are not strictly comparable
gen ba_imd = IMD_ENG
replace ba_imd = IMD_WALES if ba_imd == .
* must use as category variables!!
* set unkown to be 10 -> adds to end of contrasts so can see effect
replace LOFT_DEPTH = 10 if LOFT_DEPTH == .
* set unkown to be 2020 -> adds to end of contrasts so can see effect
replace BOILER_YEAR = 2020 if BOILER_YEAR == .
replace CWI_YEAR = 2020 if CWI_YEAR == .
replace LI_YEAR = 2020 if LI_YEAR == .
* 0 = no
destring BOILER, force replace
replace BOILER = 0 if BOILER == .
* household level vars
local generic_hvars "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_TYPE i.PROP_AGE i.EE_BAND "
local generic_hvarsnp "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_AGE i.EE_BAND "
* area level vars
local generic_rvars "i.ba_region i.ba_imd"
* define different property types
local ptypes "101 102 103 104 105 106"
local pt101 "detached"
local pt102 "semi"
local pt103 "end_terr"
local pt104 "mid_terr"
local pt105 "bung"
local pt106 "flat"
* now loop over the energy types
local vars "Gcons Econs Allcons"
foreach v of local vars {
* check distributions of original consumption values
histogram `v'2012, by(MAIN_HEAT_FUEL, total) name(histo_`v')
tabstat `v'2012, by(MAIN_HEAT_FUEL) s(n mean min max)
* all hhs model
qui: regress log_`v'2012 `generic_hvars' ///
`generic_rvars' ///
i.BOILER_YEAR
est store rlog_`v'2012
di "* -> `v' estat to test for heteroskedasticity & omitted vars"
estat ovtest
estat hettest
* we ought to be testing for linearity too
di "* -> `v' linktest to test for model specification"
di "* if p of _hatsq < 0.05 -> mis-spec"
di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm"
linktest
* models by property type - to see if rsq & coefficients vary
foreach p of local ptypes {
di "* -> testing log_`v'2012 for `pt`p''"
qui: regress log_`v'2012 `generic_hvarsnp' ///
`generic_rvars' ///
i.BOILER_YEAR ///
if PROP_TYPE == `p'
est store rlog_`v'2012_`pt`p''
di "* -> `v' 2012 `pt`p'' - estat to test for heteroskedasticity & omitted vars"
estat ovtest
estat hettest
* we ought to be testing for linearity too
di "* -> `v' `pt`p'' linktest to test for model specification"
di "* if p of _hatsq < 0.05 -> mis-spec"
di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm"
linktest
}
* models for different consumption quintiles - to see if rsq & coefficients vary
foreach q of numlist 0/4 {
di "* -> testing log_`v'2012 for quintile: `q'"
qui: regress log_`v'2012 `generic_hvars' ///
`generic_rvars' ///
i.BOILER_YEAR ///
if quinlog_`v'2012 == `q'
est store rlog_`v'2012q`q'
di "* -> quintile: `q' - estat to test for heteroskedasticity & omitted vars"
estat ovtest
estat hettest
* we ought to be testing for linearity too
di "* -> quintile: `q' - linktest"
di "* if p of _hatsq < 0.05 -> mis-spec"
di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm"
linktest
}
}
* output all the results - that's a lot of t tests!
* we could put them all out in one file but it would be really hard to find the ones you want!
estout rlog_Gcons2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Gcons2012q* using "`rpath'/NEED-EULF-2014-log-gas-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Gcons2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Econs2012 using "`rpath'/NEED-EULF-2014-log-elec-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Econs2012q* using "`rpath'/NEED-EULF-2014-log-elec-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Econs2012_* using "`rpath'/NEED-EULF-2014-log-elec-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012 using "`rpath'/NEED-EULF-2014-log-energy-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012q* using "`rpath'/NEED-EULF-2014-log-energy-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012_* using "`rpath'/NEED-EULF-2014-log-energy-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
di "* Done!"
log close
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment