analyse-NEED-EULF-2014-descriptives.do 4.26 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
* Script to analyse DECC's NEED data to:
* examine distributions etc

* NB this script uses 2 data files derived from the original data using the 'process' script

* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518

* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
* b.anderson@soton.ac.uk
* (c) University of Southampton

* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
* http://creativecommons.org/licenses/by-nc/4.0/

clear all

capture noisily log close

* written for Mac OSX - remember to change filesystem delimiter for other platforms
local home "/Users/ben/Documents"
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
local dpath "`proot'/NEED/End User Licence File 2014/processed"
Ben Anderson's avatar
Ben Anderson committed
26
local rpath "`proot'/results/NEED"
27
28
29

local version "v1.1"

Ben Anderson's avatar
Ben Anderson committed
30
* quick tests for 2012
Ben Anderson's avatar
Ben Anderson committed
31
local do_2012_desc = 1
Ben Anderson's avatar
Ben Anderson committed
32
33

* tests for all years using long file - takes a while
Ben Anderson's avatar
Ben Anderson committed
34
local do_long_desc = 1 
Ben Anderson's avatar
Ben Anderson committed
35

Ben Anderson's avatar
Ben Anderson committed
36
37
38
* toggle graph drawing
local do_graphs = 0

39
40
41
42
set more off

log using "`rpath'/analyse-NEED-EULF-2014-descriptives-`version'-$S_DATE.smcl", replace

Ben Anderson's avatar
Ben Anderson committed
43
44
45
46
47
48
49
if `do_2012_desc' {
	* use a subsample for speed
	local sample = "20pc"
	* first use the wide file for basic descrpitives
	use "`dpath'/need_eul_may2014_consumptionfile_wide_`sample'.dta", clear
	* match in the xwave file with the vars we want
	merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile_`sample'", keepusing(EE_BAND FLOOR_AREA_BAND PROP_AGE)
Ben Anderson's avatar
Ben Anderson committed
50
	* distributions for 2012 (to test)
Ben Anderson's avatar
Ben Anderson committed
51
	* processor intensive
Ben Anderson's avatar
Ben Anderson committed
52
53
	local vars "Econs2012 Gcons2012"
	local tvars "EE_BAND FLOOR_AREA_BAND PROP_AGE"
Ben Anderson's avatar
Ben Anderson committed
54
	* test values for valid - check for valid 0s for example. This only happens for gas where:
Ben Anderson's avatar
Ben Anderson committed
55
	* 100 < gcons < 250 so included but rounded to nearest 500 = 0
Ben Anderson's avatar
Ben Anderson committed
56
57
58
	
	* elec always rounded to nearest 50 so min shoudl always be 100

Ben Anderson's avatar
Ben Anderson committed
59
	foreach v of local vars {
Ben Anderson's avatar
Ben Anderson committed
60
		tabstat `v', by(`v'Valid) s(n mean semean min max)
Ben Anderson's avatar
Ben Anderson committed
61
		foreach tv of local tvars {
Ben Anderson's avatar
Ben Anderson committed
62
63
64
65
66
67
68
69
			di "***************"
			di "* Testing `tv'"
			* test values for `tv' - check for 0s for example
			tabstat `v' if `v'Valid == "V", by(`tv') s(n mean semean min max)
			tab `v' if `v' < 1000
			if `do_graphs' {
				histogram `v' if `v'Valid == "V", by(`tv') name(h_`tv'_`v'_`sample')
				graph export "`rpath'/NEED-EULF-2014-histo_`v'_by_`tv'_`sample'_valid.png", replace 
Ben Anderson's avatar
Ben Anderson committed
70
			
Ben Anderson's avatar
Ben Anderson committed
71
72
73
				graph box `v' if `v'Valid == "V", over(`tv') name(b_`tv'_`v'_`sample')
				graph export "`rpath'/NEED-EULF-2014-box_`v'_by_`tv'_`sample'_valid.png", replace 
			}
Ben Anderson's avatar
Ben Anderson committed
74
75
76
		}
	}
}
77

Ben Anderson's avatar
Ben Anderson committed
78
79
if `do_long_desc' {
	* Now use the pre-processed long form file which contains all years of consumption data but not the constant values (housing charactersitics etc) which are in the xwave file
80
	* do this for each random sample of differening sizes as a check
Ben Anderson's avatar
Ben Anderson committed
81
	local samples "10 20 30 40 50 100"
82
83
84
	foreach s of local samples {
		di "************************"
		di "* Using `s'% sample"
Ben Anderson's avatar
Ben Anderson committed
85
		use "`dpath'/need_eul_may2014_consumptionfile_long_`s'pc.dta", clear
Ben Anderson's avatar
Ben Anderson committed
86
	
Ben Anderson's avatar
Ben Anderson committed
87
88
		* set as panel in case it wasn't
		xtset HH_ID year
Ben Anderson's avatar
Ben Anderson committed
89

Ben Anderson's avatar
Ben Anderson committed
90
		* examine panel status
91
		xtdescribe 
Ben Anderson's avatar
Ben Anderson committed
92
93
		
		* distributions for valid obs
94
95
		local vars "Econs Gcons"
		foreach v of local vars {
Ben Anderson's avatar
Ben Anderson committed
96
97
98
			* overall
			xtsum `v' if `v'Valid == "V"
			* test values for valid - check for valid 0s for example. This only happens for gas where:
Ben Anderson's avatar
Ben Anderson committed
99
			* 100 < gcons < 250 so included but rounded to nearest 500 = 0
Ben Anderson's avatar
Ben Anderson committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
	
			* elec always rounded to nearest 50 so min shoudl always be 100
			tabstat `v', by(`v'Valid) s(n mean semean min max) 
			* by year
			* check for 0s
			table `v' year if `v' < 1000
			table `v'Valid year, c(count `v' min `v' mean `v' max `v')
			 
			if `do_graphs' {
				histogram `v' if `v'Valid == "V", by(year) name(histo_`s'pc_`v')
				graph export "`rpath'/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace 
				graph box `v' if `v'Valid == "V", over(year) name(box_`s'pc_`v')
				graph export "`rpath'/NEED-EULF-2014-`s'pc-box_`v'_by_year_valid.png", replace 
			}
114
		}
Ben Anderson's avatar
Ben Anderson committed
115
116
		* check the years where gas = valid but consumption = 0
		* Presumably in these cases gas > 100 but < 249 ?
Ben Anderson's avatar
Ben Anderson committed
117
	}
Ben Anderson's avatar
Ben Anderson committed
118
}
119
120
121
122

di "* Done!"

log close