******************************************* * Script to: * - Extract data from DECC LSOA level domestic energy consumption data (https://www.gov.uk/government/collections/mlsoa-and-llsoa-electricity-and-gas-estimates) /* Copyright (C) 2014 University of Southampton Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) [Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton] This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License (http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. #YMMV - http://en.wiktionary.org/wiki/YMMV */ clear all capture log close set more off local where = "~/Documents" local projroot = "`where'/Work/Data/Social Science Datatsets/DECC" local dpath = "`projroot'/LSOA Energy Data" local dofiles = "`projroot'/do_files" local rpath = "`dpath'/processed" * census data location local c2001path = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data" local c2011path = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data" local eimdpath = "`where'/Work/Data/Social Science Datatsets/Indices of Deprivation/English ID 2010" local pcluts = "`where'/Work/Data/GIS data/UK Postcodes/pcluts_2007nov_processed" local urbpath = "`where'/Work/Data/Social Science Datatsets/UK Urban Rural Classification" log using "`rpath'/extract-DECC-LSOA-energy-data-from-excel-$S_DATE.smcl", replace local years "2007 2008 2009 2010 2011" * set to 1 to get set-up etc to run local do_2007 = 0 local do_2008 = 0 local do_2009 = 0 local do_2010 = 0 local do_matrix_graphs = 0 local do_bar_graphs = 0 * 2007 = experimental pilots local dist_testvars "morphologyname imd2010_dec" * 2008 if `do_2008' { * 2008 - data in GOR tab format!! local y = "2008" local gor1 "North West" local gor2 "West Midlands" local gor3 "East Midlands" local gor4 "North East" local gor5 "Yorkshire and Humber" local gor6 "East of England" local gor7 "Greater London" local gor8 "South East" local gor9 "South West" local gor10 "Wales" foreach g of numlist 1/10 { di "* Importing GOR `g'" capture noisily import excel "`dpath'/`y'/790-llsoa-electricity-`y'-england-wales.xls", sheet("`gor`g''") cellrange(A13) allstring firstrow clear save "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta", replace } * start with first use "`dpath'/`y'/790-llsoa-electricity-`y'-gor1.dta", clear * append the others foreach g of numlist 2/10 { append using "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta" } destring Ordinarydomesticconsumption Economy7consumption Numberofordinarydomesticmete /// Numberofeconomy7meters Averageordinarydomesticconsum Averageeconomy7consumption, replace float force rename LLSOAcode lsoacode_`y' gen zonecode = lsoacode_`y' * duplicates? Caused by disclosure control? duplicates report zonecode duplicates tag zonecode, gen(dups_`y') gen gor_`y' = GOR lab var gor_`y' "Government Office Region (`y' data)" *lab def gor_`y' 1 "North East" 2 "North West" 3 "Yorkshire and Humber" 4 "East Midlands" 5 "West Midlands" 6 "East of England" 7 "Greater London" 8 "South East" 9 "South West" 10 "Wales" *lab val gor_`y' gor_`y' tab gor_`y' dups_`y', mi duplicates drop zonecode, force rename Ordinarydomesticconsumption OrdConsumption_`y' rename Economy7consumption Econ7consumption_`y' rename Numberofordinarydomesticmete NumOrdMeters_`y' rename Numberofeconomy7meters NumEcon7Meters_`y' rename Averageordinarydomesticconsum AvgOrdConsumption_`y' rename Averageeconomy7consumption AvgEcon7consumption_`y' lab var OrdConsumption_`y' "Ordinary domestic consumption `y'" lab var Econ7consumption_`y' "Economy 7 consumption `y'" lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'" lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'" lab var AvgOrdConsumption_`y' "Average domestic consumption `y'" lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'" save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace di "* Done `y'" di "*****************************" } if `do_2009' { * 2009 - data in .csv files local y = "2009" insheet using "`dpath'/`y'/2347-llsoa-domestic-elec-raw.csv", clear rename llsoa lsoacode_`y' gen zonecode = lsoacode_`y' rename consumption_dom OrdConsumption_`y' lab var OrdConsumption_`y' "Ordinary domestic consumption `y'" rename consumption_e7 Econ7consumption_`y' lab var Econ7consumption_`y' "Economy 7 consumption `y'" rename no_mpan_dom NumOrdMeters_`y' lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'" rename no_mpan_e7 NumEcon7Meters_`y' lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'" rename av_consumption_dom AvgOrdConsumption_`y' lab var AvgOrdConsumption_`y' "Average domestic consumption `y'" rename av_consumption_e7 AvgEcon7consumption_`y' lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'" * new for 2009 rename no_dis_dom no_dis_dom_`y' rename no_dis_e7 no_dis_e7_`y' * duplicates? Caused by disclosure control? duplicates report zonecode duplicates tag zonecode, gen(dups_`y') tab la_name dups_`y', mi duplicates drop zonecode, force save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace di "* Done `y'" di "*****************************" } if `do_2010' { * 2010 - data local y = "2010" import excel "`dpath'/`y'/4813-llsoa-domestic-elec-est-`y'-fixed.xls", sheet("LLSOA Electricity Domestic") firstrow clear * insheet using "`dpath'/`y'/4813-llsoa-domestic-elec-est-2010.csv", clear comma rename lsoa lsoacode_`y' gen zonecode = lsoacode_`y' rename Ordinarydomesticconsumption OrdConsumption_`y' lab var OrdConsumption_`y' "Ordinary domestic consumption `y'" rename Economy7consumption Econ7consumption_`y' lab var Econ7consumption_`y' "Economy 7 consumption `y'" rename Numberofordinarydomesticmeters NumOrdMeters_`y' lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'" rename Numberofeconomy7meters NumEcon7Meters_`y' lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'" rename Averageordinarydomesticconsumption AvgOrdConsumption_`y' lab var AvgOrdConsumption_`y' "Average domestic consumption `y'" rename Averageeconomy7consumption AvgEcon7consumption_`y' lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'" * duplicates? Caused by disclosure control? duplicates report zonecode duplicates tag zonecode, gen(dups_`y') tab la_name dups_`y', mi duplicates drop zonecode, force save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace di "* Done `y'" di "*****************************" } * merge the files starting with baseline LSOA geography file so we can see what is missing use "`urbpath'/Eng_Wales/lsoa/RUURB_2005_MAR_LSOA_EW.dta", clear merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_ew_lsoacode.dta", gen(m_lsoa_postcode) * English IMD 2010 * non-matches will be Wales merge 1:1 zonecode using "`eimdpath'/ID-2010-indices-domains.dta", gen(m_lsoa_eimd) * English address & postcode counts merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_uk_lsoacode_addr_counts.dta", gen(m_address_counts) * now DECC data - will be non-matches due to aggregation/non-disclosure merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2008-all.dta", gen(m_elec_2008) merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2009-all.dta", gen(m_elec_2009) merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2010-all.dta", gen(m_elec_2010) * first line is junk for some reason li in 1/2 drop in 1 * test merges foreach m of varlist m_* { tab `m' } su *_2008 *_2009 *_2010 local testvars "OrdConsumption Econ7consumption AvgOrdConsumption AvgEcon7consumption" local years "2008 2009 2010" local diffyears "2008 2009" foreach v of local testvars { * 2009 - 2008 gen `v'_09_08_dif = `v'_2009 - `v'_2008 lab var `v'_09_08_dif "Change in `v' (2009 - 2008)" gen `v'_09_08_difpc = 100*(`v'_09_08_dif/`v'_2008) lab var `v'_09_08_difpc "Change in `v' (2009 - 2008) as % of 2008" * 2010 - 2009 gen `v'_10_09_dif = `v'_2010 - `v'_2009 lab var `v'_10_09_dif "Change in `v' (2010 - 2009)" gen `v'_10_09_difpc = 100*(`v'_10_09_dif/`v'_2009) lab var `v'_10_09_difpc "Change in `v' (2010 - 2009) as % of 2009" * 2010 - 2008 gen `v'_10_08_dif = `v'_2010 - `v'_2008 lab var `v'_10_08_dif "Change in `v' (2010 - 2008)" gen `v'_10_08_difpc = 100*(`v'_10_08_dif/`v'_2008) lab var `v'_10_08_difpc "Change in `v' (2010 - 2008) as % of 2008" foreach y of local years { di "****************" di "* Testing `v' for `y'" di "*" gen z`v'_`y' = 0 lab var z`v'_`y' "Value is less than zero (`v', `y')" replace z`v'_`y' = 1 if `v'_`y' < 0 * How many LSOAs have negative values? table regionname morphologyname z`v'_`y' * which LSOAs are they? li regionname districtname lowersoacode NumOrdMeters_`y' NumEcon7Meters_`y' z`v'_`y' `v'_`y' if z`v'_`y' == 1, sep(0) noobs di "*" di "* End testing `v' for `y'" di "****************" } di "****************" } if `do_matrix_graphs' { * test year change graph matrix OrdConsumption_*, msize(tiny) name(OrdConsumption) half scale(0.75) graph export "`rpath'/matrix-OrdConsumption.png", replace graph matrix Econ7consumption_*, msize(tiny) name(Econ7consumption) half scale(0.75) graph export "`rpath'/matrix-Econ7consumption.png", replace graph matrix NumOrdMeters_*, msize(tiny) name(NumOrdMeters) half scale(0.75) graph export "`rpath'/matrix-NumOrdMeters.png", replace graph matrix NumEcon7Meters_*, msize(tiny) name(NumEcon7Meters) half scale(0.75) graph export "`rpath'/matrix-NumEcon7Meters.png", replace graph matrix AvgOrdConsumption_*, msize(tiny) name(AvgOrdConsumption) half scale(0.75) graph export "`rpath'/matrix-AvgOrdConsumption.png", replace graph matrix AvgEcon7consumption_*, msize(tiny) name(AvgEcon7consumption) half scale(0.75) graph export "`rpath'/matrix-AvgEcon7consumption.png", replace * compare with address counts graph matrix NumOrdMeters_* g_uk_address_count g_uk_deliverypoint_count, msize(tiny) name(address_counts) half scale(0.75) graph export "`rpath'/matrix-ord-meter-address-counts.png", replace } if `do_bar_graphs' { graph hbar *_difpc, by(morphologyname) name(hbar_morph) graph export "`rpath'/hbar-difpc-morphologyname.png", replace graph hbar *_difpc, by(imd2010_dec) name(hbar_imd2010_dec) graph export "`rpath'/hbar-difdif-c-imd2010_dec.png", replace } drop v* su z* su *dif* local vars = "NumOrdMeters NumEcon7Meters" local years "2008 2009 2010" foreach v of local vars { foreach y of local years { gen `v'_`y'_addr_ppn = `v'_`y'/g_uk_address_count gen `v'_`y'_delptc_ppn = `v'_`y'/g_uk_deliverypoint_count } } local vars = "OrdConsumption_ Econ7consumption_ AvgOrdConsumption_ AvgEcon7consumption_ NumOrdMeters_ NumEcon7Meters_" foreach dv of local dist_testvars { di "* Testing by `dv'" foreach v of local vars { di "* -> Testing `v'* " tabstat `v'*, by(`dv') } di "* -> Testing NumMeters_* " tabstat *Meters_*_addr_ppn , by(`dv') tabstat *Meters_*_delptc_ppn , by(`dv') } gen lsoacode_2001 = ew_lsoacode preserve keep zonecode lsoacode_2001 regioncode regionname countycode countyname districtcode districtname *200* *dif* outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo.csv", comma replace outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo-southwest.csv" if regionname == "South West", comma replace export excel using "`dpath'/processed/llsoa-electricity-all-years-England-geo.xls", sheet("data") firstrow(variables) replace restore save "`dpath'/processed/llsoa-electricity-all-years-England-geo.dta", replace log close