analyse-NEED-EULF-2014.do 6.82 KB
Newer Older
1
2
3
4
* Script to analyse DECC's NEED data to:
* investigate % variance of energy consumption due to dwelling type variables as a way to infer the % of variance due to people

* NB this script uses 2 data files derived from the original data using the 'process' script
Ben Anderson's avatar
Ben Anderson committed
5
6

* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
7
* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
Ben Anderson's avatar
Ben Anderson committed
8
9
10
11
12

* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
* b.anderson@soton.ac.uk
* (c) University of Southampton

Ben Anderson's avatar
Ben Anderson committed
13
* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
Ben Anderson's avatar
Ben Anderson committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
* http://creativecommons.org/licenses/by-nc/4.0/

clear all

capture noisily log close

* written for Mac OSX - remember to change filesystem delimiter for other platforms
local home "/Users/ben/Documents"
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
local dpath "`proot'/NEED/End User Licence File 2014/processed"
local rpath "`proot'/results/NEED/"

28
local version "v1.1"
Ben Anderson's avatar
Ben Anderson committed
29
30
31
32
33
34
35
36
37
38
39
40

set more off

log using "`rpath'/analyse-NEED-EULF-2014-`version'-$S_DATE.smcl", replace

* use the pre-processed wide form file which contains all years of consumption data but not the constant values which are in the xwave file
use "`dpath'/need_eul_may2014_consumptionfile_wide.dta", clear

* we're goinmg to use 2012 data only

keep HH_ID *2012*

41
42
43
44
45
46
47
48
49
50
51
* log the consumption as it's very skewed -> becomes semi-normal & OK for linear regression
* Gcons = gas
* Econs = Electricity
* Presumably those without gas use oil or electricity for heating - we don't have oil so we should probably restrict analysis to gas-using hosueholds only to avoid this confounding factor?

* check what's valid
tab Gcons2012Valid Econs2012Valid, mi // what does G,L,M mean? Presumably O = off gas?
tabstat Gcons2012, by(Gcons2012Valid) s(mean min max n)

keep if Gcons2012Valid == "V"

Ben Anderson's avatar
Ben Anderson committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
gen log_Gcons2012 = log(Gcons2012)
gen log_Econs2012 = log(Econs2012)

* combine consumption
* treat missing (gas) as 0
egen Allcons2012 = rowtotal(Gcons2012 Econs2012)

gen log_Allcons2012 = log(Allcons2012)
* create log consumption quintiles
egen quinlog_Allcons2012 = cut(log_Allcons2012), group(5)
egen quinlog_Gcons2012 = cut(log_Gcons2012), group(5)
egen quinlog_Econs2012 = cut(log_Econs2012), group(5)

* merge in the pre-processed cross-year fixed values file
merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile.dta"

* fix some of the variables

* combine IMD: this is a bit dodgy as they are not strictly comparable
gen ba_imd = IMD_ENG
replace ba_imd = IMD_WALES if ba_imd == .

* must use as category variables!!
* set unkown to be 10 -> adds to end of contrasts so can see effect
replace LOFT_DEPTH = 10 if LOFT_DEPTH == .

* set unkown to be 2020 -> adds to end of contrasts so can see effect
replace BOILER_YEAR = 2020 if BOILER_YEAR == .
replace CWI_YEAR = 2020 if CWI_YEAR == .
replace LI_YEAR  = 2020 if LI_YEAR  == .

* 0 = no
destring BOILER, force replace
replace BOILER = 0 if BOILER == .

* household level vars
local generic_hvars "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_TYPE i.PROP_AGE i.EE_BAND "
local generic_hvarsnp "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_AGE i.EE_BAND "

* area level vars
local generic_rvars "i.ba_region i.ba_imd"

* define different property types
local ptypes "101 102 103 104 105 106"
local pt101 "detached"
local pt102 "semi"
local pt103 "end_terr"
local pt104 "mid_terr"
local pt105 "bung"
local pt106 "flat"

103
* now loop over the energy types
Ben Anderson's avatar
Ben Anderson committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
local vars "Gcons Econs Allcons"
foreach v of local vars {
	* all hhs model
	qui: regress log_`v'2012 `generic_hvars' ///
		`generic_rvars' ///
		i.BOILER_YEAR
	
	est store rlog_`v'2012
	di "* -> `v' estat to test for heteroskedasticity & omitted vars"
	estat ovtest
	estat hettest
	
	di "* -> `v' linktest"
	di "* if p of _hatsq < 0.05 -> mis-spec"
	di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
	linktest	
	
121
	* models by property type - to see if rsq & coefficients vary
Ben Anderson's avatar
Ben Anderson committed
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
	foreach p of local ptypes {
		di "* -> testing log_`v'2012 for `pt`p''"
		qui: regress log_`v'2012 `generic_hvarsnp' ///
			`generic_rvars'	///
			i.BOILER_YEAR ///
			if PROP_TYPE == `p'
		est store rlog_`v'2012_`pt`p''
		
		di "* -> `v' 2012 `pt`p'' - estat to test for heteroskedasticity & omitted vars"
		estat ovtest
		estat hettest
		
		di "* -> `v' `pt`p'' linktest"
		di "* if p of _hatsq < 0.05 -> mis-spec"
		di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
		linktest	
	}
139
	* models for different consumption quintiles - to see if rsq & coefficients vary
Ben Anderson's avatar
Ben Anderson committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
	foreach q of numlist 0/4 {
		di "* -> testing log_`v'2012 for quintile: `q'"
		qui: regress log_`v'2012 `generic_hvars' ///
			`generic_rvars'	///
			i.BOILER_YEAR ///
			if quinlog_`v'2012 == `q'
		est store rlog_`v'2012q`q'
		
		di "* -> quintile: `q' - estat to test for heteroskedasticity & omitted vars"
		estat ovtest
		estat hettest
		
		di "* -> quintile: `q' - linktest"
		di "* if p of _hatsq < 0.05 -> mis-spec"
		di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
		linktest	
	}
}

* output all the results - that's a lot of t tests!
160
161
* we could put them all out in one file but it would be really hard to find the ones you want!

Ben Anderson's avatar
Ben Anderson committed
162
163
164
165
166
167
168
169
170
estout rlog_Gcons2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Gcons2012q* using "`rpath'/NEED-EULF-2014-log-gas-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Gcons2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Econs2012 using "`rpath'/NEED-EULF-2014-log-elec-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 N ll)
estout rlog_Econs2012q* using "`rpath'/NEED-EULF-2014-log-elec-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 N ll)
estout rlog_Econs2012_* using "`rpath'/NEED-EULF-2014-log-elec-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012 using "`rpath'/NEED-EULF-2014-log-energy-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012q* using "`rpath'/NEED-EULF-2014-log-energy-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012_* using "`rpath'/NEED-EULF-2014-log-energy-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
171
172
173
174

di "* Done!"

log close