analyse-NEED-EULF-2014-descriptives.do 5.22 KB
Newer Older
1
2
* Script to:
* - analyse DECC's EULF 2014 NEED data to examine distributions etc 
3
4
5
6

* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
* NB this script uses 2 data files derived from the original data using the 'process' script

/*   

Copyright (C) 2014  University of Southampton

Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License 
(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

#YMMV - http://en.wiktionary.org/wiki/YMMV
27

28
*/
29
30
31
32
33
34

clear all

capture noisily log close

* written for Mac OSX - remember to change filesystem delimiter for other platforms
Ben Anderson's avatar
Ben Anderson committed
35
local home "~/Documents"
36
37
38
39
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
local dpath "`proot'/NEED/End User Licence File 2014/processed"
Ben Anderson's avatar
Ben Anderson committed
40
local rpath "`proot'/results/NEED"
41
42

local version "v1.1"
Ben Anderson's avatar
Ben Anderson committed
43
44
* set sample
local sample "100pc"
45

Ben Anderson's avatar
Ben Anderson committed
46
* quick tests for 2012
47
local do_2012_desc = 0
Ben Anderson's avatar
Ben Anderson committed
48
49

* tests for all years using long file - takes a while
Ben Anderson's avatar
Ben Anderson committed
50
local do_long_desc = 1 
Ben Anderson's avatar
Ben Anderson committed
51

Ben Anderson's avatar
Ben Anderson committed
52
53
54
* toggle graph drawing
local do_graphs = 0

55
56
57
58
set more off

log using "`rpath'/analyse-NEED-EULF-2014-descriptives-`version'-$S_DATE.smcl", replace

Ben Anderson's avatar
Ben Anderson committed
59
60
61
62
63
64
65
if `do_2012_desc' {
	* use a subsample for speed
	local sample = "20pc"
	* first use the wide file for basic descrpitives
	use "`dpath'/need_eul_may2014_consumptionfile_wide_`sample'.dta", clear
	* match in the xwave file with the vars we want
	merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile_`sample'", keepusing(EE_BAND FLOOR_AREA_BAND PROP_AGE)
Ben Anderson's avatar
Ben Anderson committed
66
	* distributions for 2012 (to test)
Ben Anderson's avatar
Ben Anderson committed
67
	* processor intensive
Ben Anderson's avatar
Ben Anderson committed
68
69
	local vars "Econs2012 Gcons2012"
	local tvars "EE_BAND FLOOR_AREA_BAND PROP_AGE"
Ben Anderson's avatar
Ben Anderson committed
70
	
Ben Anderson's avatar
Ben Anderson committed
71
	* test values for valid - check for valid 0s for example. This only happens for gas where:
Ben Anderson's avatar
Ben Anderson committed
72
	* 100 < gcons < 250 so included but rounded to nearest 500 = 0
Ben Anderson's avatar
Ben Anderson committed
73
	
Ben Anderson's avatar
Ben Anderson committed
74
	* elec always rounded to nearest 50 so min should always be 100
Ben Anderson's avatar
Ben Anderson committed
75

Ben Anderson's avatar
Ben Anderson committed
76
	foreach v of local vars {
Ben Anderson's avatar
Ben Anderson committed
77
		tabstat `v', by(`v'Valid) s(n mean semean min max)
Ben Anderson's avatar
Ben Anderson committed
78
		foreach tv of local tvars {
Ben Anderson's avatar
Ben Anderson committed
79
			di "***************"
80
			di "* Testing `v' by `tv' for `s'% sample"
Ben Anderson's avatar
Ben Anderson committed
81
82
83
84
85
			* test values for `tv' - check for 0s for example
			tabstat `v' if `v'Valid == "V", by(`tv') s(n mean semean min max)
			tab `v' if `v' < 1000
			if `do_graphs' {
				histogram `v' if `v'Valid == "V", by(`tv') name(h_`tv'_`v'_`sample')
Ben Anderson's avatar
Ben Anderson committed
86
				graph export "`rpath'/graphs/NEED-EULF-2014-histo_`v'_by_`tv'_`sample'_valid.png", replace 
Ben Anderson's avatar
Ben Anderson committed
87
			
Ben Anderson's avatar
Ben Anderson committed
88
89
90
				graph box `v' if `v'Valid == "V", over(`tv') name(b_`tv'_`v'_`sample')
				graph export "`rpath'/NEED-EULF-2014-box_`v'_by_`tv'_`sample'_valid.png", replace 
			}
Ben Anderson's avatar
Ben Anderson committed
91
92
93
		}
	}
}
94

Ben Anderson's avatar
Ben Anderson committed
95
96
if `do_long_desc' {
	* Now use the pre-processed long form file which contains all years of consumption data but not the constant values (housing charactersitics etc) which are in the xwave file
Ben Anderson's avatar
Ben Anderson committed
97
	* do this for each random sample of differing sizes as a check
98
	* local samples "10 20 30 40 50 100"
Ben Anderson's avatar
Ben Anderson committed
99
	local samples "10"
100
101
102
	foreach s of local samples {
		di "************************"
		di "* Using `s'% sample"
Ben Anderson's avatar
Ben Anderson committed
103
		use "`dpath'/need_eul_may2014_consumptionfile_long_`s'pc.dta", clear
Ben Anderson's avatar
Ben Anderson committed
104
	
Ben Anderson's avatar
Ben Anderson committed
105
106
		* set as panel in case it wasn't
		xtset HH_ID year
Ben Anderson's avatar
Ben Anderson committed
107

Ben Anderson's avatar
Ben Anderson committed
108
		* examine panel status
109
		xtdescribe 
Ben Anderson's avatar
Ben Anderson committed
110
111
		
		* distributions for valid obs
112
113
		local vars "Econs Gcons"
		foreach v of local vars {
114
115
116
			di "***************"
			di "* Testing `v' for `s'% sample"

Ben Anderson's avatar
Ben Anderson committed
117
118
119
			* overall
			xtsum `v' if `v'Valid == "V"
			* test values for valid - check for valid 0s for example. This only happens for gas where:
Ben Anderson's avatar
Ben Anderson committed
120
			* 100 < gcons < 250 so included but rounded to nearest 500 = 0
Ben Anderson's avatar
Ben Anderson committed
121
122
123
124
	
			* elec always rounded to nearest 50 so min shoudl always be 100
			tabstat `v', by(`v'Valid) s(n mean semean min max) 
			* by year
125
			di "* check `v' for 0s (`s'% sample)"
Ben Anderson's avatar
Ben Anderson committed
126
127
128
129
130
131
132
133
134
			table `v' year if `v' < 1000
			table `v'Valid year, c(count `v' min `v' mean `v' max `v')
			 
			if `do_graphs' {
				histogram `v' if `v'Valid == "V", by(year) name(histo_`s'pc_`v')
				graph export "`rpath'/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace 
				graph box `v' if `v'Valid == "V", over(year) name(box_`s'pc_`v')
				graph export "`rpath'/NEED-EULF-2014-`s'pc-box_`v'_by_year_valid.png", replace 
			}
135
			* check the panel transitions for each valid
Ben Anderson's avatar
Ben Anderson committed
136
137
138
139
140
141
			gen `v'Validr = 1 if `v'Valid == "V"
			replace `v'Validr = 2 if `v'Valid == "O"
			replace `v'Validr = 3 if `v'Valid == "L"
			replace `v'Validr = 4 if `v'Valid == "G"
			replace `v'Validr = 5 if `v'Valid == "M"
			
142
			lab var `v'Validr "Recoded `v'Valid"
Ben Anderson's avatar
Ben Anderson committed
143
			lab def `v'Validr 1 "(V)alid" 2 "(O)ff-gas" 3 "(L)Gas < 100" 4 "(G) Gas > 50,000" 5 "M(issing in source)"
144
			lab val `v'Validr `v'Validr
Ben Anderson's avatar
Ben Anderson committed
145
146
			* di "Check transitions (`v'Validr)"
			xttrans `v'Validr, freq
147
		}
Ben Anderson's avatar
Ben Anderson committed
148
	}
Ben Anderson's avatar
Ben Anderson committed
149
}
150
151
152
153

di "* Done!"

log close