Skip to content
Snippets Groups Projects
Commit 7dd7b759 authored by Ben Anderson's avatar Ben Anderson
Browse files

fixed header typos

parent 36611cf4
No related branches found
No related tags found
No related merge requests found
*******************************************
* Script to:
*******************************************
* Script to:
* - Extract data from DECC LSOA level domestic energy consumption data (https://www.gov.uk/government/collections/mlsoa-and-llsoa-electricity-and-gas-estimates)
/*
Copyright (C) 2014 University of Southampton
Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut)
[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License
(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
#YMMV - http://en.wiktionary.org/wiki/YMMV
*/
/*
Copyright (C) 2014 University of Southampton
Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut)
[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License
(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
#YMMV - http://en.wiktionary.org/wiki/YMMV
*/
clear all
......@@ -29,7 +29,7 @@ capture log close
set more off
local where = "/Users/ben/Documents"
local where = "~/Documents"
local projroot = "`where'/Work/Data/Social Science Datatsets/DECC"
local dpath = "`projroot'/LSOA Energy Data"
......@@ -37,7 +37,7 @@ local dofiles = "`projroot'/do_files"
local rpath = "`dpath'/processed"
* census data location
local c2001path = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data"
local c2001path = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data"
local c2011path = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data"
local eimdpath = "`where'/Work/Data/Social Science Datatsets/Indices of Deprivation/English ID 2010"
local pcluts = "`where'/Work/Data/GIS data/UK Postcodes/pcluts_2007nov_processed"
......@@ -52,7 +52,7 @@ local do_2008 = 0
local do_2009 = 0
local do_2010 = 0
local do_matrix_graphs = 0
local do_bar_graphs = 1
local do_bar_graphs = 0
......@@ -330,14 +330,18 @@ foreach dv of local dist_testvars {
tabstat *Meters_*_delptc_ppn , by(`dv')
}
gen lsoacode_2001 = ew_lsoacode
preserve
keep zonecode regioncode regionname countycode countyname districtcode districtname *200* *dif*
keep zonecode lsoacode_2001 regioncode regionname countycode countyname districtcode districtname *200* *dif*
outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo.csv", comma replace
outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo-southwest.csv" if regionname == "South West", comma replace
export excel using "`dpath'/processed/llsoa-electricity-all-years-England-geo.xls", sheet("data") firstrow(variables) replace
restore
save "`dpath'/processed/llsoa-electricity-all-years-England-geo.dta", replace
......
......@@ -32,7 +32,7 @@ clear all
capture noisily log close
* written for Mac OSX - remember to change filesystem delimiter for other platforms
local home "/Users/ben/Documents"
local home "~/Documents"
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
......
......@@ -37,7 +37,7 @@ capture noisily log close
set more off
* written for Mac OSX - remember to change filesystem delimiter for other platforms
global home "/Users/ben/Documents"
global home "~/Documents"
global dpath "$home/Work/Data/Social Science Datatsets/DECC/NEED/End User Licence File 2014/processed"
global rpath "$home/Work/Papers and Conferences/RSS-2015/results"
......@@ -62,30 +62,46 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0
* also be aware that the consumption is rounded in buckets:
/*
GconsYEAR . Missing, off gas or invalid consumption 100 � 7,999 Gas consumption kWh rounded to nearest 500 kWh 8,000- 15,999 Gas consumption kWh rounded to nearest 100 kWh 16,000 � 24,999 Gas consumption kWh rounded to nearest 500 kWh 25,000 � 34,999 Gas consumption kWh rounded to nearest 1,000 kWh 35,000 � 50,000 Gas consumption kWh rounded to nearest 5,000 kWh EconsYEAR . Missing or invalid consumption 100 - 9,999 Electricity consumption kWh rounded to nearest 50 kWh 10,000 - 11,999 Electricity consumption kWh rounded to nearest 100 kWh 12,000 - 14,999 Electricity consumption kWh rounded to nearest 500 kWh 15,000 - 19,999 Electricity consumption kWh rounded to nearest 1,000 kWh 20,000 - 25,000 Electricity consumption kWh rounded to nearest 5,000 kWh
GconsYEAR . Missing, off gas or invalid consumption
100 � 7,999 Gas consumption kWh rounded to nearest 500 kWh
8,000- 15,999 Gas consumption kWh rounded to nearest 100 kWh
16,000 � 24,999 Gas consumption kWh rounded to nearest 500 kWh
25,000 � 34,999 Gas consumption kWh rounded to nearest 1,000 kWh
35,000 � 50,000 Gas consumption kWh rounded to nearest 5,000 kWh
EconsYEAR . Missing or invalid consumption
100 - 9,999 Electricity consumption kWh rounded to nearest 50 kWh
10,000 - 11,999 Electricity consumption kWh rounded to nearest 100 kWh
12,000 - 14,999 Electricity consumption kWh rounded to nearest 500 kWh
15,000 - 19,999 Electricity consumption kWh rounded to nearest 1,000 kWh
20,000 - 25,000 Electricity consumption kWh rounded to nearest 5,000 kWh
set more off
*/
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace
if `do_desc' {
di "************************"
di "* Using `sample'% sample"
* load the yearly consumption data
use "$dpath/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear
* merge in the xwave file (fixed data - we assume!)
merge m:1 HH_ID using "$dpath/need_eul_may2014_xwavefile_100pc.dta"
di "************************"
di "* Using `sample'% sample"
* load the yearly consumption data
use "$dpath/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear
* set as panel in case it wasn't
* fix format of year so xtset doesn't break
format year %ty
xtset HH_ID year, delta(1 year)
* merge in the xwave file (fixed data - we assume!)
merge m:1 HH_ID using "$dpath/need_eul_may2014_xwavefile_100pc.dta"
* examine panel status
xtdescribe
lab var Econs "Electricity (KwH/year)"
lab var Gcons "Gas (KwH/year)"
* set up
* set as panel in case it wasn't
* fix format of year so xtset doesn't break
format year %ty
xtset HH_ID year, delta(1 year)
* examine panel status
xtdescribe
* set up
local vars "Econs Gcons"
foreach v of local vars {
di "***************"
......@@ -101,6 +117,9 @@ foreach v of local vars {
lab var `v'Validr "Recoded `v'Valid"
lab val `v'Validr `v'Validr
di "* Check transitions (`v'Validr)"
xttrans `v'Validr, freq
* set up consumption deciles
levelsof(year), local(levels)
foreach l of local levels {
......@@ -116,60 +135,57 @@ foreach v of local vars {
tab `v'_dec year
}
stop
* flag dwellings which are off gas for electricity
* NB - in this dataset we don't know if they use electricity as main heat (could be oil)
gen ba_off_gas = 0
replace ba_off_gas = 1 if GconsValidr == 2
lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)"
lab val ba_off_gas ba_off_gas
* check
tabstat Gcons Econs, by(ba_off_gas)
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
* flag dwellings which are off gas for electricity
* NB - in this dataset we don't know if they use electricity as main heat (could be oil)
gen ba_off_gas = 0
replace ba_off_gas = 1 if GconsValidr == 2
lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)"
lab val ba_off_gas ba_off_gas
tab ba_off_gas MAIN_HEAT_FUEL, mi // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
table year MAIN_HEAT_FUEL, by(ba_off_gas)
* roughly constant rate throughout years
table year MAIN_HEAT_FUEL, by(ba_off_gas) c(mean Gcons n Gcons)
* but off gas have no gas readings as you'd expect (DECC filter)
* check
tabstat Gcons Econs, by(ba_off_gas)
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
foreach v of local vars {
di "***************"
di "* Testing `v' for `sample'% sample"
tab ba_off_gas MAIN_HEAT_FUEL, mi // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
* overall
xtsum `v' if `v'Valid == "V"
* test values for valid - check for valid 0s for example. This only happens for gas where:
* 100 < gcons < 250 so included but rounded to nearest 500 = 0
table year MAIN_HEAT_FUEL, by(ba_off_gas)
* roughly constant rate throughout years
table year MAIN_HEAT_FUEL, by(ba_off_gas) c(mean Gcons n Gcons)
* but off gas have no gas readings as you'd expect (DECC filter)
* elec always rounded to nearest 50 so min should always be 100
foreach v of local vars {
di "***************"
di "* Testing `v' for `sample'% sample"
tabstat `v', by(`v'Valid) s(n mean semean min max)
* by year
di "* check `v' for 0s (`s'% sample)"
table `v' year if `v' < 1000
table `v'Valid year, c(count `v' min `v' mean `v' max `v')
* overall
xtsum `v' if `v'Valid == "V"
* test values for valid - check for valid 0s for example. This only happens for gas where:
* 100 < gcons < 250 so included but rounded to nearest 500 = 0
if `do_graphs' {
di "* Running graphs - do not keep in memory, just save out"
di "* Running graphs: histo"
histogram `v' if `v'Valid == "V", by(year)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace
* elec always rounded to nearest 50 so min should always be 100
di "* Running graphs: boxes"
graph box `v' if `v'Valid == "V", over(year)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace
tabstat `v', by(`v'Valid) s(n mean semean min max)
* by year
di "* check `v' for 0s (`s'% sample)"
table `v' year if `v' < 1000
table `v'Valid year, c(count `v' min `v' mean `v' max `v')
graph box `v' if `v'Valid == "V", over(year) by(FLOOR_AREA_BAND)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_floor_valid.png", replace
if `do_graphs' {
di "* Running graphs - do not keep in memory, just save out"
di "* Running graphs: histo"
histogram `v' if `v'Valid == "V", by(year) scale(0.75)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace
graph box `v' if `v'Valid == "V", over(year) by(EE_BAND)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_ee_valid.png", replace
di "* Running graphs: boxes"
graph box `v' if `v'Valid == "V", over(year) scale(0.75)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace
}
graph box `v' if `v'Valid == "V", over(year) by(FLOOR_AREA_BAND) scale(0.75)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_floor_valid.png", replace
graph box `v' if `v'Valid == "V", over(year) by(EE_BAND) scale(0.75)
graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_ee_valid.png", replace
di "* Check transitions (`v'Validr)"
xttrans `v'Validr, freq
}
}
......
......@@ -32,7 +32,7 @@ clear all
capture noisily log close
* written for Mac OSX - remember to change filesystem delimiter for other platforms
local home "/Users/ben/Documents"
local home "~/Documents"
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
......
......@@ -52,7 +52,7 @@ clear all
capture noisily log close _all
* written for Mac OSX - remember to change filesystem delimiter for other platforms
local home "/Users/ben/Documents"
local home "~/Documents"
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment