******************************************* * Script to: * - analyse DECC's EULF 2014 NEED data to examine distributions etc * Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014 * http://discover.ukdataservice.ac.uk/catalogue/?sn=7518 * Most recent version of this script can be found at https://github.com/dataknut/DECC-data/tree/master/NEED * The script requires the following to have been run first: * https://github.com/dataknut/DECC-data/blob/master/NEED/process-NEED-EULF-2014.do /* Copyright (C) 2014 University of Southampton Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) [Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton] This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License (http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. #YMMV - http://en.wiktionary.org/wiki/YMMV */ clear all capture noisily log close set more off * written for Mac OSX - remember to change filesystem delimiter for other platforms global home "~/Documents" global dpath "$home/Work/Data/Social Science Datatsets/DECC/NEED/End User Licence File 2014/processed" global rpath "$home/Work/Papers and Conferences/RSS-2015/results" local version "v1" * set sample * 100 = 100pc * etc local sample "100" * control what happens local do_desc = 1 * toggle graph drawing local do_graphs = 1 * ref DECC look up table lab def GconsValidr 1 "(V)alid" 2 "(O)ff-gas" 3 "(L)Gas < 100" 4 "(G) Gas > 50,000" 5 "M(issing in source)" * NB DECC look up table says max elec = 50,000 lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,000" 5 "M(issing in source)" * also be aware that the consumption is rounded in buckets: /* GconsYEAR . Missing, off gas or invalid consumption 100 � 7,999 Gas consumption kWh rounded to nearest 500 kWh 8,000- 15,999 Gas consumption kWh rounded to nearest 100 kWh 16,000 � 24,999 Gas consumption kWh rounded to nearest 500 kWh 25,000 � 34,999 Gas consumption kWh rounded to nearest 1,000 kWh 35,000 � 50,000 Gas consumption kWh rounded to nearest 5,000 kWh EconsYEAR . Missing or invalid consumption 100 - 9,999 Electricity consumption kWh rounded to nearest 50 kWh 10,000 - 11,999 Electricity consumption kWh rounded to nearest 100 kWh 12,000 - 14,999 Electricity consumption kWh rounded to nearest 500 kWh 15,000 - 19,999 Electricity consumption kWh rounded to nearest 1,000 kWh 20,000 - 25,000 Electricity consumption kWh rounded to nearest 5,000 kWh set more off */ log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace di "************************" di "* Using `sample'% sample" * load the yearly consumption data use "$dpath/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear * merge in the xwave file (fixed data - we assume!) merge m:1 HH_ID using "$dpath/need_eul_may2014_xwavefile_100pc.dta" lab var Econs "Electricity (KwH/year)" lab var Gcons "Gas (KwH/year)" * set as panel in case it wasn't * fix format of year so xtset doesn't break format year %ty xtset HH_ID year, delta(1 year) * examine panel status xtdescribe * set up local vars "Econs Gcons" foreach v of local vars { di "***************" di "* Testing `v' for `sample'% sample" di "* check the panel transitions for each valid" gen `v'Validr = 1 if `v'Valid == "V" replace `v'Validr = 2 if `v'Valid == "O" // off gas (from EPC) only relevant for gas replace `v'Validr = 3 if `v'Valid == "L" replace `v'Validr = 4 if `v'Valid == "G" replace `v'Validr = 5 if `v'Valid == "M" lab var `v'Validr "Recoded `v'Valid" lab val `v'Validr `v'Validr di "* Check transitions (`v'Validr)" xttrans `v'Validr, freq * set up consumption deciles levelsof(year), local(levels) foreach l of local levels { di "* Calculating consumption deciles for `v' for `l'" * creates missing for other years have to do this as egen does not allow by egen `v'_dec_`l' = cut(`v') if year == `l', group(10) } * now combine them - set missing option otherwise it counts a row where all are missing as 0 egen `v'_dec = rowtotal(`v'_dec_*), missing * remove temporary ones drop `v'_dec_* * check tab `v'_dec year } * flag dwellings which are off gas for electricity * NB - in this dataset we don't know if they use electricity as main heat (could be oil) gen ba_off_gas = 0 replace ba_off_gas = 1 if GconsValidr == 2 lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)" lab val ba_off_gas ba_off_gas * check tabstat Gcons Econs, by(ba_off_gas) di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'" tab ba_off_gas MAIN_HEAT_FUEL, mi // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'? table year MAIN_HEAT_FUEL, by(ba_off_gas) * roughly constant rate throughout years table year MAIN_HEAT_FUEL, by(ba_off_gas) c(mean Gcons n Gcons) * but off gas have no gas readings as you'd expect (DECC filter) foreach v of local vars { di "***************" di "* Testing `v' for `sample'% sample" * overall xtsum `v' if `v'Valid == "V" * test values for valid - check for valid 0s for example. This only happens for gas where: * 100 < gcons < 250 so included but rounded to nearest 500 = 0 * elec always rounded to nearest 50 so min should always be 100 tabstat `v', by(`v'Valid) s(n mean semean min max) * by year di "* check `v' for 0s (`s'% sample)" table `v' year if `v' < 1000 table `v'Valid year, c(count `v' min `v' mean `v' max `v') if `do_graphs' { di "* Running graphs - do not keep in memory, just save out" di "* Running graphs: histo" histogram `v' if `v'Valid == "V", by(year) scale(0.75) graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace di "* Running graphs: boxes" graph box `v' if `v'Valid == "V", over(year) scale(0.75) graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace graph box `v' if `v'Valid == "V", over(year) by(FLOOR_AREA_BAND) scale(0.75) graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_floor_valid.png", replace graph box `v' if `v'Valid == "V", over(year) by(EE_BAND) scale(0.75) graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_ee_valid.png", replace } } di "* Done!" log close