analyse-NEED-EULF-2014-electricity-consumption.do 6.66 KB
Newer Older
1
2
*******************************************
* Script to:
Ben Anderson's avatar
Ben Anderson committed
3
* - analyse DECC's EULF 2014 NEED data to examine distributions etc
4
5
6
7
8
9
10
11

* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518

* Most recent version of this script can be found at https://github.com/dataknut/DECC-data/tree/master/NEED
* The script requires the following to have been run first:
* https://github.com/dataknut/DECC-data/blob/master/NEED/process-NEED-EULF-2014.do

Ben Anderson's avatar
Ben Anderson committed
12
/*
13
14
15

Copyright (C) 2014  University of Southampton

Ben Anderson's avatar
Ben Anderson committed
16
Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut)
17
18
19
20
	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Ben Anderson's avatar
Ben Anderson committed
21
the Free Software Foundation; either version 2 of the License
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

#YMMV - http://en.wiktionary.org/wiki/YMMV

*/

clear all

capture noisily log close

set more off

* written for Mac OSX - remember to change filesystem delimiter for other platforms
Ben Anderson's avatar
Ben Anderson committed
40
global home "~/Documents"
41

Ben Anderson's avatar
Ben Anderson committed
42
43
global dpath "$home/Work/Data/Social Science Datatsets/DECC/NEED/End User Licence File 2014/processed"
global rpath "$home/Work/Papers and Conferences/RSS-2015/results"
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

local version "v1"
* set sample
* 100 = 100pc
* etc
local sample "100"

* control what happens
local do_desc = 1

* toggle graph drawing
local do_graphs = 1

* ref DECC look up table
lab def GconsValidr 1 "(V)alid" 2 "(O)ff-gas" 3 "(L)Gas < 100" 4 "(G) Gas > 50,000" 5 "M(issing in source)"
* NB DECC look up table says max elec = 50,000
lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,000" 5 "M(issing in source)"

Ben Anderson's avatar
Ben Anderson committed
62

63
64
* also be aware that the consumption is rounded in buckets:
/*
Ben Anderson's avatar
Ben Anderson committed
65
66
67
68
69
70
71
72
73
74
75
76
77
78
GconsYEAR	.	Missing, off gas or invalid consumption
	100 � 7,999	Gas consumption kWh rounded to nearest 500 kWh
	8,000- 15,999	Gas consumption kWh rounded to nearest 100 kWh
	16,000 � 24,999	Gas consumption kWh rounded to nearest 500 kWh
	25,000 � 34,999	Gas consumption kWh rounded to nearest 1,000 kWh
	35,000 � 50,000	Gas consumption kWh rounded to nearest 5,000 kWh


EconsYEAR	.	Missing or invalid consumption
	100 - 9,999	Electricity consumption kWh rounded to nearest 50 kWh
	10,000 - 11,999	Electricity consumption kWh rounded to nearest 100 kWh
	12,000 - 14,999	Electricity consumption kWh rounded to nearest 500 kWh
	15,000 - 19,999	Electricity consumption kWh rounded to nearest 1,000 kWh
	20,000 - 25,000	Electricity consumption kWh rounded to nearest 5,000 kWh
79
80
81
set more off
*/

Ben Anderson's avatar
Ben Anderson committed
82
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace
83

Ben Anderson's avatar
Ben Anderson committed
84

Ben Anderson's avatar
Ben Anderson committed
85
86
87
88
di "************************"
di "* Using `sample'% sample"
* load the yearly consumption data
use "$dpath/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear
89

Ben Anderson's avatar
Ben Anderson committed
90
91
* merge in the xwave file (fixed data - we assume!)
merge m:1 HH_ID using "$dpath/need_eul_may2014_xwavefile_100pc.dta"
92

Ben Anderson's avatar
Ben Anderson committed
93
94
lab var Econs "Electricity (KwH/year)"
lab var Gcons "Gas (KwH/year)"
Ben Anderson's avatar
Ben Anderson committed
95

Ben Anderson's avatar
Ben Anderson committed
96
97
98
99
100
101
102
103
104
* set as panel in case it wasn't
* fix format of year so xtset doesn't break
format year %ty
xtset HH_ID year, delta(1 year)

* examine panel status
xtdescribe

* set up
Ben Anderson's avatar
Ben Anderson committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
local vars "Econs Gcons"
foreach v of local vars {
	di "***************"
	di "* Testing `v' for `sample'% sample"

	di "* check the panel transitions for each valid"
	gen `v'Validr = 1 if `v'Valid == "V"
	replace `v'Validr = 2 if `v'Valid == "O" // off gas (from EPC) only relevant for gas
	replace `v'Validr = 3 if `v'Valid == "L"
	replace `v'Validr = 4 if `v'Valid == "G"
	replace `v'Validr = 5 if `v'Valid == "M"

	lab var `v'Validr "Recoded `v'Valid"
	lab val `v'Validr `v'Validr

Ben Anderson's avatar
Ben Anderson committed
120
121
122
	di "* Check transitions (`v'Validr)"
	xttrans `v'Validr, freq

Ben Anderson's avatar
Ben Anderson committed
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
	* set up consumption deciles
	levelsof(year), local(levels)
	foreach l of local levels {
		di "* Calculating consumption deciles for `v' for `l'"
		* creates missing for other years have to do this as egen does not allow by
		egen `v'_dec_`l' = cut(`v') if year == `l', group(10)
	}
	* now combine them - set missing option otherwise it counts a row where all are missing as 0
	egen `v'_dec = rowtotal(`v'_dec_*), missing
	* remove temporary ones
	drop `v'_dec_*
	* check
	tab `v'_dec year
}

Ben Anderson's avatar
Ben Anderson committed
138
139
140
141
142
143
* flag dwellings which are off gas for electricity
* NB - in this dataset we don't know if they use electricity as main heat (could be oil)
gen ba_off_gas = 0
replace ba_off_gas = 1 if  GconsValidr == 2
lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)"
lab val ba_off_gas ba_off_gas
Ben Anderson's avatar
Ben Anderson committed
144

Ben Anderson's avatar
Ben Anderson committed
145
146
147
* check
tabstat Gcons Econs, by(ba_off_gas)
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
Ben Anderson's avatar
Ben Anderson committed
148

Ben Anderson's avatar
Ben Anderson committed
149
tab ba_off_gas MAIN_HEAT_FUEL, mi // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
150

Ben Anderson's avatar
Ben Anderson committed
151
152
153
154
table year MAIN_HEAT_FUEL, by(ba_off_gas)
* roughly constant rate throughout years
table year MAIN_HEAT_FUEL, by(ba_off_gas) c(mean Gcons n Gcons)
* but off gas have no gas readings as you'd expect (DECC filter)
155

Ben Anderson's avatar
Ben Anderson committed
156
157
158
foreach v of local vars {
	di "***************"
	di "* Testing `v' for `sample'% sample"
Ben Anderson's avatar
Ben Anderson committed
159

Ben Anderson's avatar
Ben Anderson committed
160
161
162
163
	* overall
	xtsum `v' if `v'Valid == "V"
	* test values for valid - check for valid 0s for example. This only happens for gas where:
	* 100 < gcons < 250 so included but rounded to nearest 500 = 0
Ben Anderson's avatar
Ben Anderson committed
164

Ben Anderson's avatar
Ben Anderson committed
165
	* elec always rounded to nearest 50 so min should always be 100
Ben Anderson's avatar
Ben Anderson committed
166

Ben Anderson's avatar
Ben Anderson committed
167
168
169
170
171
	tabstat `v', by(`v'Valid) s(n mean semean min max)
	* by year
	di "* check `v' for 0s (`s'% sample)"
	table `v' year if `v' < 1000
	table `v'Valid year, c(count `v' min `v' mean `v' max `v')
Ben Anderson's avatar
Ben Anderson committed
172

Ben Anderson's avatar
Ben Anderson committed
173
174
175
176
177
	if `do_graphs' {
		di "* Running graphs - do not keep in memory, just save out"
		di "* Running graphs: histo"
		histogram `v' if `v'Valid == "V", by(year) scale(0.75)
		graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace
Ben Anderson's avatar
Ben Anderson committed
178

Ben Anderson's avatar
Ben Anderson committed
179
180
181
		di "* Running graphs: boxes"
		graph box `v' if `v'Valid == "V", over(year) scale(0.75)
		graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace
Ben Anderson's avatar
Ben Anderson committed
182

Ben Anderson's avatar
Ben Anderson committed
183
184
185
186
187
		graph box `v' if `v'Valid == "V", over(year) by(FLOOR_AREA_BAND) scale(0.75)
		graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_floor_valid.png", replace

		graph box `v' if `v'Valid == "V", over(year) by(EE_BAND) scale(0.75)
		graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_ee_valid.png", replace
Ben Anderson's avatar
Ben Anderson committed
188

189
190
191
192
193
194
	}
}

di "* Done!"

log close