diff --git a/NEED/analyse-NEED-EULF-2014-electricity-consumption.do b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do new file mode 100644 index 0000000000000000000000000000000000000000..3c966696dc0e9e27b2cbfac1025ff1f8be44ba6c --- /dev/null +++ b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do @@ -0,0 +1,124 @@ +******************************************* +* Script to: +* - analyse DECC's EULF 2014 NEED data to examine distributions etc + +* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014 +* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518 + +* Most recent version of this script can be found at https://github.com/dataknut/DECC-data/tree/master/NEED +* The script requires the following to have been run first: +* https://github.com/dataknut/DECC-data/blob/master/NEED/process-NEED-EULF-2014.do + +/* + +Copyright (C) 2014 University of Southampton + +Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) + [Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton] + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License +(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +#YMMV - http://en.wiktionary.org/wiki/YMMV + +*/ + +clear all + +capture noisily log close + +set more off + +* written for Mac OSX - remember to change filesystem delimiter for other platforms +global home "/Users/ben/Documents" + +local dpath "$home/Work/Data/Social Science Datatsets/DECC/NEED/End User Licence File 2014/processed" +local rpath "$home/Work/Papers and Conferences/RSS-2015/results" + +local version "v1" +* set sample +* 100 = 100pc +* etc +local sample "100" + +* control what happens +local do_desc = 1 + +* toggle graph drawing +local do_graphs = 1 + +* ref DECC look up table +lab def GconsValidr 1 "(V)alid" 2 "(O)ff-gas" 3 "(L)Gas < 100" 4 "(G) Gas > 50,000" 5 "M(issing in source)" +* NB DECC look up table says max elec = 50,000 +lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,000" 5 "M(issing in source)" + +* also be aware that the consumption is rounded in buckets: +/* +GconsYEAR . Missing, off gas or invalid consumption 100 Ð 7,999 Gas consumption kWh rounded to nearest 500 kWh 8,000- 15,999 Gas consumption kWh rounded to nearest 100 kWh 16,000 Ð 24,999 Gas consumption kWh rounded to nearest 500 kWh 25,000 Ð 34,999 Gas consumption kWh rounded to nearest 1,000 kWh 35,000 Ð 50,000 Gas consumption kWh rounded to nearest 5,000 kWh EconsYEAR . Missing or invalid consumption 100 - 9,999 Electricity consumption kWh rounded to nearest 50 kWh 10,000 - 11,999 Electricity consumption kWh rounded to nearest 100 kWh 12,000 - 14,999 Electricity consumption kWh rounded to nearest 500 kWh 15,000 - 19,999 Electricity consumption kWh rounded to nearest 1,000 kWh 20,000 - 25,000 Electricity consumption kWh rounded to nearest 5,000 kWh +set more off +*/ + +log using "`rpath'/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace + +if `do_desc' { + di "************************" + di "* Using `sample'% sample" + use "`dpath'/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear + + * set as panel in case it wasn't + xtset HH_ID year + + * examine panel status + xtdescribe + + * distributions for valid obs + * Gcons + local vars "Econs Gcons" + foreach v of local vars { + di "***************" + di "* Testing `v' for `sample'% sample" + + * overall + xtsum `v' if `v'Valid == "V" + * test values for valid - check for valid 0s for example. This only happens for gas where: + * 100 < gcons < 250 so included but rounded to nearest 500 = 0 + + * elec always rounded to nearest 50 so min should always be 100 + + tabstat `v', by(`v'Valid) s(n mean semean min max) + * by year + di "* check `v' for 0s (`s'% sample)" + table `v' year if `v' < 1000 + table `v'Valid year, c(count `v' min `v' mean `v' max `v') + + if `do_graphs' { + histogram `v' if `v'Valid == "V", by(year) name(histo_`s'pc_`v') + graph export "`rpath'/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace + graph box `v' if `v'Valid == "V", over(year) name(box_`s'pc_`v') + graph export "`rpath'/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace + } + + di "* check the panel transitions for each valid" + gen `v'Validr = 1 if `v'Valid == "V" + replace `v'Validr = 2 if `v'Valid == "O" + replace `v'Validr = 3 if `v'Valid == "L" + replace `v'Validr = 4 if `v'Valid == "G" + replace `v'Validr = 5 if `v'Valid == "M" + + lab var `v'Validr "Recoded `v'Valid" + lab val `v'Validr `v'Validr + * di "Check transitions (`v'Validr)" + xttrans `v'Validr, freq + } +} + +di "* Done!" + +log close