diff --git a/NEED/analyse-NEED-EULF-2014-electricity-consumption.do b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do index 3c966696dc0e9e27b2cbfac1025ff1f8be44ba6c..1e05ac6ecf2bb649b3cb64284936c2ff7fe8f917 100644 --- a/NEED/analyse-NEED-EULF-2014-electricity-consumption.do +++ b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do @@ -1,6 +1,6 @@ ******************************************* * Script to: -* - analyse DECC's EULF 2014 NEED data to examine distributions etc +* - analyse DECC's EULF 2014 NEED data to examine distributions etc * Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014 * http://discover.ukdataservice.ac.uk/catalogue/?sn=7518 @@ -9,16 +9,16 @@ * The script requires the following to have been run first: * https://github.com/dataknut/DECC-data/blob/master/NEED/process-NEED-EULF-2014.do -/* +/* Copyright (C) 2014 University of Southampton -Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) +Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) [Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton] This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License +the Free Software Foundation; either version 2 of the License (http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -39,8 +39,8 @@ set more off * written for Mac OSX - remember to change filesystem delimiter for other platforms global home "/Users/ben/Documents" -local dpath "$home/Work/Data/Social Science Datatsets/DECC/NEED/End User Licence File 2014/processed" -local rpath "$home/Work/Papers and Conferences/RSS-2015/results" +global dpath "$home/Work/Data/Social Science Datatsets/DECC/NEED/End User Licence File 2014/processed" +global rpath "$home/Work/Papers and Conferences/RSS-2015/results" local version "v1" * set sample @@ -59,28 +59,80 @@ lab def GconsValidr 1 "(V)alid" 2 "(O)ff-gas" 3 "(L)Gas < 100" 4 "(G) Gas > 50,0 * NB DECC look up table says max elec = 50,000 lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,000" 5 "M(issing in source)" + * also be aware that the consumption is rounded in buckets: /* -GconsYEAR . Missing, off gas or invalid consumption 100 � 7,999 Gas consumption kWh rounded to nearest 500 kWh 8,000- 15,999 Gas consumption kWh rounded to nearest 100 kWh 16,000 � 24,999 Gas consumption kWh rounded to nearest 500 kWh 25,000 � 34,999 Gas consumption kWh rounded to nearest 1,000 kWh 35,000 � 50,000 Gas consumption kWh rounded to nearest 5,000 kWh EconsYEAR . Missing or invalid consumption 100 - 9,999 Electricity consumption kWh rounded to nearest 50 kWh 10,000 - 11,999 Electricity consumption kWh rounded to nearest 100 kWh 12,000 - 14,999 Electricity consumption kWh rounded to nearest 500 kWh 15,000 - 19,999 Electricity consumption kWh rounded to nearest 1,000 kWh 20,000 - 25,000 Electricity consumption kWh rounded to nearest 5,000 kWh +GconsYEAR . Missing, off gas or invalid consumption 100 � 7,999 Gas consumption kWh rounded to nearest 500 kWh 8,000- 15,999 Gas consumption kWh rounded to nearest 100 kWh 16,000 � 24,999 Gas consumption kWh rounded to nearest 500 kWh 25,000 � 34,999 Gas consumption kWh rounded to nearest 1,000 kWh 35,000 � 50,000 Gas consumption kWh rounded to nearest 5,000 kWh EconsYEAR . Missing or invalid consumption 100 - 9,999 Electricity consumption kWh rounded to nearest 50 kWh 10,000 - 11,999 Electricity consumption kWh rounded to nearest 100 kWh 12,000 - 14,999 Electricity consumption kWh rounded to nearest 500 kWh 15,000 - 19,999 Electricity consumption kWh rounded to nearest 1,000 kWh 20,000 - 25,000 Electricity consumption kWh rounded to nearest 5,000 kWh set more off */ -log using "`rpath'/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace +log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace if `do_desc' { di "************************" di "* Using `sample'% sample" - use "`dpath'/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear + * load the yearly consumption data + use "$dpath/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear + + * merge in the xwave file (fixed data - we assume!) + merge m:1 HH_ID using "$dpath/need_eul_may2014_xwavefile_100pc.dta" * set as panel in case it wasn't - xtset HH_ID year + * fix format of year so xtset doesn't break + format year %ty + xtset HH_ID year, delta(1 year) * examine panel status - xtdescribe - - * distributions for valid obs - * Gcons - local vars "Econs Gcons" + xtdescribe + + * set up +local vars "Econs Gcons" +foreach v of local vars { + di "***************" + di "* Testing `v' for `sample'% sample" + + di "* check the panel transitions for each valid" + gen `v'Validr = 1 if `v'Valid == "V" + replace `v'Validr = 2 if `v'Valid == "O" // off gas (from EPC) only relevant for gas + replace `v'Validr = 3 if `v'Valid == "L" + replace `v'Validr = 4 if `v'Valid == "G" + replace `v'Validr = 5 if `v'Valid == "M" + + lab var `v'Validr "Recoded `v'Valid" + lab val `v'Validr `v'Validr + + * set up consumption deciles + levelsof(year), local(levels) + foreach l of local levels { + di "* Calculating consumption deciles for `v' for `l'" + * creates missing for other years have to do this as egen does not allow by + egen `v'_dec_`l' = cut(`v') if year == `l', group(10) + } + * now combine them - set missing option otherwise it counts a row where all are missing as 0 + egen `v'_dec = rowtotal(`v'_dec_*), missing + * remove temporary ones + drop `v'_dec_* + * check + tab `v'_dec year +} + +stop + * flag dwellings which are off gas for electricity + * NB - in this dataset we don't know if they use electricity as main heat (could be oil) + gen ba_off_gas = 0 + replace ba_off_gas = 1 if GconsValidr == 2 + lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)" + lab val ba_off_gas ba_off_gas + * check + tabstat Gcons Econs, by(ba_off_gas) + di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'" + + tab ba_off_gas MAIN_HEAT_FUEL, mi // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'? + table year MAIN_HEAT_FUEL, by(ba_off_gas) + * roughly constant rate throughout years + table year MAIN_HEAT_FUEL, by(ba_off_gas) c(mean Gcons n Gcons) + * but off gas have no gas readings as you'd expect (DECC filter) + foreach v of local vars { di "***************" di "* Testing `v' for `sample'% sample" @@ -91,30 +143,32 @@ if `do_desc' { * 100 < gcons < 250 so included but rounded to nearest 500 = 0 * elec always rounded to nearest 50 so min should always be 100 - - tabstat `v', by(`v'Valid) s(n mean semean min max) + + tabstat `v', by(`v'Valid) s(n mean semean min max) * by year di "* check `v' for 0s (`s'% sample)" table `v' year if `v' < 1000 table `v'Valid year, c(count `v' min `v' mean `v' max `v') - + if `do_graphs' { - histogram `v' if `v'Valid == "V", by(year) name(histo_`s'pc_`v') - graph export "`rpath'/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace - graph box `v' if `v'Valid == "V", over(year) name(box_`s'pc_`v') - graph export "`rpath'/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace + di "* Running graphs - do not keep in memory, just save out" + di "* Running graphs: histo" + histogram `v' if `v'Valid == "V", by(year) + graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace + + di "* Running graphs: boxes" + graph box `v' if `v'Valid == "V", over(year) + graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace + + graph box `v' if `v'Valid == "V", over(year) by(FLOOR_AREA_BAND) + graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_floor_valid.png", replace + + graph box `v' if `v'Valid == "V", over(year) by(EE_BAND) + graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_ee_valid.png", replace + } - - di "* check the panel transitions for each valid" - gen `v'Validr = 1 if `v'Valid == "V" - replace `v'Validr = 2 if `v'Valid == "O" - replace `v'Validr = 3 if `v'Valid == "L" - replace `v'Validr = 4 if `v'Valid == "G" - replace `v'Validr = 5 if `v'Valid == "M" - - lab var `v'Validr "Recoded `v'Valid" - lab val `v'Validr `v'Validr - * di "Check transitions (`v'Validr)" + + di "* Check transitions (`v'Validr)" xttrans `v'Validr, freq } }