*******************************************
* Script to:
* - analyse DECC's EULF 2014 NEED data to examine distributions etc
* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
* Most recent version of this script can be found at https://github.com/dataknut/DECC-data/tree/master/NEED
* The script requires the following to have been run first:
* https://github.com/dataknut/DECC-data/blob/master/NEED/process-NEED-EULF-2014.do
/*
Copyright (C) 2014 University of Southampton
Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut)
[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License
(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
#YMMV - http://en.wiktionary.org/wiki/YMMV
*/
clear all
capture noisily log close
set more off
* written for Mac OSX - remember to change filesystem delimiter for other platforms
global home "~/Documents"
global dpath "$home/Work/Data/Social Science Datatsets/DECC/NEED/End User Licence File 2014/processed"
global rpath "$home/Work/Papers and Conferences/RSS-2015/results"
local version "v1"
* set sample
* 100 = 100pc
* etc
local sample "100"
* control what happens
local do_desc = 1
* toggle graph drawing
local do_graphs = 1
* ref DECC look up table
lab def GconsValidr 1 "(V)alid" 2 "(O)ff-gas" 3 "(L)Gas < 100" 4 "(G) Gas > 50,000" 5 "M(issing in source)"
* NB DECC look up table says max elec = 50,000
lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,000" 5 "M(issing in source)"
* also be aware that the consumption is rounded in buckets:
/*
GconsYEAR . Missing, off gas or invalid consumption
100 � 7,999 Gas consumption kWh rounded to nearest 500 kWh
8,000- 15,999 Gas consumption kWh rounded to nearest 100 kWh
16,000 � 24,999 Gas consumption kWh rounded to nearest 500 kWh
25,000 � 34,999 Gas consumption kWh rounded to nearest 1,000 kWh
35,000 � 50,000 Gas consumption kWh rounded to nearest 5,000 kWh
EconsYEAR . Missing or invalid consumption
100 - 9,999 Electricity consumption kWh rounded to nearest 50 kWh
10,000 - 11,999 Electricity consumption kWh rounded to nearest 100 kWh
12,000 - 14,999 Electricity consumption kWh rounded to nearest 500 kWh
15,000 - 19,999 Electricity consumption kWh rounded to nearest 1,000 kWh
20,000 - 25,000 Electricity consumption kWh rounded to nearest 5,000 kWh
set more off
*/
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace
di "************************"
di "* Using `sample'% sample"
* load the yearly consumption data
use "$dpath/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear
* merge in the xwave file (fixed data - we assume!)
merge m:1 HH_ID using "$dpath/need_eul_may2014_xwavefile_100pc.dta"
lab var Econs "Electricity (KwH/year)"
lab var Gcons "Gas (KwH/year)"
* set as panel in case it wasn't
* fix format of year so xtset doesn't break
format year %ty
xtset HH_ID year, delta(1 year)
* examine panel status
xtdescribe
* set up
local vars "Econs Gcons"
foreach v of local vars {
di "***************"
di "* Testing `v' for `sample'% sample"
di "* check the panel transitions for each valid"
gen `v'Validr = 1 if `v'Valid == "V"
replace `v'Validr = 2 if `v'Valid == "O" // off gas (from EPC) only relevant for gas
replace `v'Validr = 3 if `v'Valid == "L"
replace `v'Validr = 4 if `v'Valid == "G"
replace `v'Validr = 5 if `v'Valid == "M"
lab var `v'Validr "Recoded `v'Valid"
lab val `v'Validr `v'Validr
di "* Check transitions (`v'Validr)"
xttrans `v'Validr, freq
* set up consumption deciles
levelsof(year), local(levels)
foreach l of local levels {
di "* Calculating consumption deciles for `v' for `l'"
* creates missing for other years have to do this as egen does not allow by
egen `v'_dec_`l' = cut(`v') if year == `l', group(10)
}
* now combine them - set missing option otherwise it counts a row where all are missing as 0
egen `v'_dec = rowtotal(`v'_dec_*), missing
* remove temporary ones
drop `v'_dec_*
* check
tab `v'_dec year
}
* flag dwellings which are off gas for electricity
* NB - in this dataset we don't know if they use electricity as main heat (could be oil)
gen ba_off_gas = 0
replace ba_off_gas = 1 if GconsValidr == 2
lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)"
lab val ba_off_gas ba_off_gas
* check
tabstat Gcons Econs, by(ba_off_gas)
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
tab ba_off_gas MAIN_HEAT_FUEL, mi // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
table year MAIN_HEAT_FUEL, by(ba_off_gas)
* roughly constant rate throughout years
table year MAIN_HEAT_FUEL, by(ba_off_gas) c(mean Gcons n Gcons)
* but off gas have no gas readings as you'd expect (DECC filter)
foreach v of local vars {
di "***************"
di "* Testing `v' for `sample'% sample"
* overall
xtsum `v' if `v'Valid == "V"
* test values for valid - check for valid 0s for example. This only happens for gas where:
* 100 < gcons < 250 so included but rounded to nearest 500 = 0
* elec always rounded to nearest 50 so min should always be 100
tabstat `v', by(`v'Valid) s(n mean semean min max)
* by year
di "* check `v' for 0s (`s'% sample)"
table `v' year if `v' < 1000
table `v'Valid year, c(count `v' min `v' mean `v' max `v')
if `do_graphs' {
di "* Running graphs - do not keep in memory, just save out"
di "* Running graphs: histo"
histogram `v' if `v'Valid == "V", by(year) scale(0.75)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace
di "* Running graphs: boxes"
graph box `v' if `v'Valid == "V", over(year) scale(0.75)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace
graph box `v' if `v'Valid == "V", over(year) by(FLOOR_AREA_BAND) scale(0.75)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_floor_valid.png", replace
graph box `v' if `v'Valid == "V", over(year) by(EE_BAND) scale(0.75)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_ee_valid.png", replace
}
}
di "* Done!"
log close