analyse-NEED-EULF-2014.do 7.18 KB
Newer Older
1
2
3
4
* Script to analyse DECC's NEED data to:
* investigate % variance of energy consumption due to dwelling type variables as a way to infer the % of variance due to people

* NB this script uses 2 data files derived from the original data using the 'process' script
Ben Anderson's avatar
Ben Anderson committed
5
6

* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
7
* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
Ben Anderson's avatar
Ben Anderson committed
8
9
10
11
12

* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
* b.anderson@soton.ac.uk
* (c) University of Southampton

Ben Anderson's avatar
Ben Anderson committed
13
* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
Ben Anderson's avatar
Ben Anderson committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
* http://creativecommons.org/licenses/by-nc/4.0/

clear all

capture noisily log close

* written for Mac OSX - remember to change filesystem delimiter for other platforms
local home "/Users/ben/Documents"
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
local dpath "`proot'/NEED/End User Licence File 2014/processed"
local rpath "`proot'/results/NEED/"

28
local version "v1.1"
Ben Anderson's avatar
Ben Anderson committed
29
30
31
32
33
34
35
36
37
38
39
40

set more off

log using "`rpath'/analyse-NEED-EULF-2014-`version'-$S_DATE.smcl", replace

* use the pre-processed wide form file which contains all years of consumption data but not the constant values which are in the xwave file
use "`dpath'/need_eul_may2014_consumptionfile_wide.dta", clear

* we're goinmg to use 2012 data only

keep HH_ID *2012*

41
42
43
44
45
46
47
48
49
50
51
* log the consumption as it's very skewed -> becomes semi-normal & OK for linear regression
* Gcons = gas
* Econs = Electricity
* Presumably those without gas use oil or electricity for heating - we don't have oil so we should probably restrict analysis to gas-using hosueholds only to avoid this confounding factor?

* check what's valid
tab Gcons2012Valid Econs2012Valid, mi // what does G,L,M mean? Presumably O = off gas?
tabstat Gcons2012, by(Gcons2012Valid) s(mean min max n)

keep if Gcons2012Valid == "V"

Ben Anderson's avatar
Ben Anderson committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
gen log_Gcons2012 = log(Gcons2012)
gen log_Econs2012 = log(Econs2012)

* combine consumption
* treat missing (gas) as 0
egen Allcons2012 = rowtotal(Gcons2012 Econs2012)

gen log_Allcons2012 = log(Allcons2012)
* create log consumption quintiles
egen quinlog_Allcons2012 = cut(log_Allcons2012), group(5)
egen quinlog_Gcons2012 = cut(log_Gcons2012), group(5)
egen quinlog_Econs2012 = cut(log_Econs2012), group(5)

* merge in the pre-processed cross-year fixed values file
merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile.dta"

* fix some of the variables

* combine IMD: this is a bit dodgy as they are not strictly comparable
gen ba_imd = IMD_ENG
replace ba_imd = IMD_WALES if ba_imd == .

* must use as category variables!!
* set unkown to be 10 -> adds to end of contrasts so can see effect
replace LOFT_DEPTH = 10 if LOFT_DEPTH == .

* set unkown to be 2020 -> adds to end of contrasts so can see effect
replace BOILER_YEAR = 2020 if BOILER_YEAR == .
replace CWI_YEAR = 2020 if CWI_YEAR == .
replace LI_YEAR  = 2020 if LI_YEAR  == .

* 0 = no
destring BOILER, force replace
replace BOILER = 0 if BOILER == .

* household level vars
local generic_hvars "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_TYPE i.PROP_AGE i.EE_BAND "
local generic_hvarsnp "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_AGE i.EE_BAND "

* area level vars
local generic_rvars "i.ba_region i.ba_imd"

* define different property types
local ptypes "101 102 103 104 105 106"
local pt101 "detached"
local pt102 "semi"
local pt103 "end_terr"
local pt104 "mid_terr"
local pt105 "bung"
local pt106 "flat"

103
* now loop over the energy types
Ben Anderson's avatar
Ben Anderson committed
104
105
local vars "Gcons Econs Allcons"
foreach v of local vars {
106
107
108
109
	* check distributions of original consumption values
	histogram `v'2012, by(MAIN_HEAT_FUEL, total) name(histo_`v')
	tabstat `v'2012, by(MAIN_HEAT_FUEL) s(n mean min max)
	
Ben Anderson's avatar
Ben Anderson committed
110
111
112
113
114
115
116
117
118
119
	* all hhs model
	qui: regress log_`v'2012 `generic_hvars' ///
		`generic_rvars' ///
		i.BOILER_YEAR
	
	est store rlog_`v'2012
	di "* -> `v' estat to test for heteroskedasticity & omitted vars"
	estat ovtest
	estat hettest
	
120
	* we ought to be testing for linearity too
121
	di "* -> `v' linktest to test for model specification"
Ben Anderson's avatar
Ben Anderson committed
122
	di "* if p of _hatsq < 0.05 -> mis-spec"
123
	di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm"
Ben Anderson's avatar
Ben Anderson committed
124
125
	linktest	
	
126
	* models by property type - to see if rsq & coefficients vary
Ben Anderson's avatar
Ben Anderson committed
127
128
129
130
131
132
133
134
135
136
137
138
	foreach p of local ptypes {
		di "* -> testing log_`v'2012 for `pt`p''"
		qui: regress log_`v'2012 `generic_hvarsnp' ///
			`generic_rvars'	///
			i.BOILER_YEAR ///
			if PROP_TYPE == `p'
		est store rlog_`v'2012_`pt`p''
		
		di "* -> `v' 2012 `pt`p'' - estat to test for heteroskedasticity & omitted vars"
		estat ovtest
		estat hettest
		
139
		* we ought to be testing for linearity too
140
		di "* -> `v' `pt`p'' linktest to test for model specification"
Ben Anderson's avatar
Ben Anderson committed
141
		di "* if p of _hatsq < 0.05 -> mis-spec"
142
		di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm"
Ben Anderson's avatar
Ben Anderson committed
143
144
		linktest	
	}
145
	* models for different consumption quintiles - to see if rsq & coefficients vary
Ben Anderson's avatar
Ben Anderson committed
146
147
148
149
150
151
152
153
154
155
156
157
	foreach q of numlist 0/4 {
		di "* -> testing log_`v'2012 for quintile: `q'"
		qui: regress log_`v'2012 `generic_hvars' ///
			`generic_rvars'	///
			i.BOILER_YEAR ///
			if quinlog_`v'2012 == `q'
		est store rlog_`v'2012q`q'
		
		di "* -> quintile: `q' - estat to test for heteroskedasticity & omitted vars"
		estat ovtest
		estat hettest
		
158
		* we ought to be testing for linearity too
Ben Anderson's avatar
Ben Anderson committed
159
160
		di "* -> quintile: `q' - linktest"
		di "* if p of _hatsq < 0.05 -> mis-spec"
161
		di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm"
Ben Anderson's avatar
Ben Anderson committed
162
163
164
165
166
		linktest	
	}
}

* output all the results - that's a lot of t tests!
167
168
* we could put them all out in one file but it would be really hard to find the ones you want!

Ben Anderson's avatar
Ben Anderson committed
169
170
171
estout rlog_Gcons2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Gcons2012q* using "`rpath'/NEED-EULF-2014-log-gas-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Gcons2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
172
173
estout rlog_Econs2012 using "`rpath'/NEED-EULF-2014-log-elec-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Econs2012q* using "`rpath'/NEED-EULF-2014-log-elec-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
Ben Anderson's avatar
Ben Anderson committed
174
175
176
177
estout rlog_Econs2012_* using "`rpath'/NEED-EULF-2014-log-elec-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012 using "`rpath'/NEED-EULF-2014-log-energy-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012q* using "`rpath'/NEED-EULF-2014-log-energy-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012_* using "`rpath'/NEED-EULF-2014-log-energy-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
178
179
180
181

di "* Done!"

log close