Skip to content
Snippets Groups Projects
analyse-NEED-EULF-2014-electricity-consumption.do 4.62 KiB
*******************************************
* Script to:
* - analyse DECC's EULF 2014 NEED data to examine distributions etc 

* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518

* Most recent version of this script can be found at https://github.com/dataknut/DECC-data/tree/master/NEED
* The script requires the following to have been run first:
* https://github.com/dataknut/DECC-data/blob/master/NEED/process-NEED-EULF-2014.do

/*   

Copyright (C) 2014  University of Southampton

Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License 
(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

#YMMV - http://en.wiktionary.org/wiki/YMMV

*/

clear all

capture noisily log close

set more off

* written for Mac OSX - remember to change filesystem delimiter for other platforms
global home "/Users/ben/Documents"

local dpath "$home/Work/Data/Social Science Datatsets/DECC/NEED/End User Licence File 2014/processed"
local rpath "$home/Work/Papers and Conferences/RSS-2015/results"

local version "v1"
* set sample
* 100 = 100pc
* etc
local sample "100"

* control what happens
local do_desc = 1

* toggle graph drawing
local do_graphs = 1

* ref DECC look up table
lab def GconsValidr 1 "(V)alid" 2 "(O)ff-gas" 3 "(L)Gas < 100" 4 "(G) Gas > 50,000" 5 "M(issing in source)"
* NB DECC look up table says max elec = 50,000
lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,000" 5 "M(issing in source)"

* also be aware that the consumption is rounded in buckets:
/*
GconsYEAR	.	Missing, off gas or invalid consumption
	100  7,999	Gas consumption kWh rounded to nearest 500 kWh
	8,000- 15,999	Gas consumption kWh rounded to nearest 100 kWh
	16,000  24,999	Gas consumption kWh rounded to nearest 500 kWh
	25,000  34,999	Gas consumption kWh rounded to nearest 1,000 kWh
	35,000  50,000	Gas consumption kWh rounded to nearest 5,000 kWh
		
		
EconsYEAR	.	Missing or invalid consumption
	100 - 9,999	Electricity consumption kWh rounded to nearest 50 kWh
	10,000 - 11,999	Electricity consumption kWh rounded to nearest 100 kWh
	12,000 - 14,999	Electricity consumption kWh rounded to nearest 500 kWh
	15,000 - 19,999	Electricity consumption kWh rounded to nearest 1,000 kWh
	20,000 - 25,000	Electricity consumption kWh rounded to nearest 5,000 kWh
set more off
*/

log using "`rpath'/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace

if `do_desc' {
	di "************************"
	di "* Using `sample'% sample"
	use "`dpath'/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear

	* set as panel in case it wasn't
	xtset HH_ID year

	* examine panel status
	xtdescribe 
	
	* distributions for valid obs
	* Gcons
	local vars "Econs Gcons"
	foreach v of local vars {
		di "***************"
		di "* Testing `v' for `sample'% sample"

		* overall
		xtsum `v' if `v'Valid == "V"
		* test values for valid - check for valid 0s for example. This only happens for gas where:
		* 100 < gcons < 250 so included but rounded to nearest 500 = 0

		* elec always rounded to nearest 50 so min should always be 100
		
		tabstat `v', by(`v'Valid) s(n mean semean min max) 
		* by year
		di "* check `v' for 0s (`s'% sample)"
		table `v' year if `v' < 1000
		table `v'Valid year, c(count `v' min `v' mean `v' max `v')
		 
		if `do_graphs' {
			histogram `v' if `v'Valid == "V", by(year) name(histo_`s'pc_`v')
			graph export "`rpath'/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace 
			graph box `v' if `v'Valid == "V", over(year) name(box_`s'pc_`v')
			graph export "`rpath'/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace 
		}
		
		di "* check the panel transitions for each valid"
		gen `v'Validr = 1 if `v'Valid == "V"
		replace `v'Validr = 2 if `v'Valid == "O"
		replace `v'Validr = 3 if `v'Valid == "L"
		replace `v'Validr = 4 if `v'Valid == "G"
		replace `v'Validr = 5 if `v'Valid == "M"
		
		lab var `v'Validr "Recoded `v'Valid"
		lab val `v'Validr `v'Validr
		* di "Check transitions (`v'Validr)"
		xttrans `v'Validr, freq
	}
}

di "* Done!"

log close