diff --git a/extract-DECC-LSOA-energy-data-from-excel.do b/extract-DECC-LSOA-energy-data-from-excel.do index df878135995a85accb4cd8cd10cc61390becd4ee..31a3aceba018a5044f47da312f298bd1c80ebcfb 100644 --- a/extract-DECC-LSOA-energy-data-from-excel.do +++ b/extract-DECC-LSOA-energy-data-from-excel.do @@ -1,320 +1,329 @@ -* Extract data from DECC extracts - -clear all - -capture log close - -set more off - +* Extract data from DECC LSOA level domestic energy consumption data: +* https://www.gov.uk/government/collections/mlsoa-and-llsoa-electricity-and-gas-estimates + +* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014 +* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518 +* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton +* b.anderson@soton.ac.uk +* (c) University of Southampton +* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies +* http://creativecommons.org/licenses/by-nc/4.0/ + +clear all + +capture log close + +set more off + local where = "/Users/ben/Documents" local projroot = "`where'/Work/Data/Social Science Datatsets/DECC" local dpath = "`projroot'/LSOA Energy Data" local dofiles = "`projroot'/do_files" -local rpath = "`projroot'/LSOA Energy Data/processed" - -* census data location +local rpath = "`projroot'/LSOA Energy Data/processed" + +* census data location local cpath = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data" -local eimdpath = "`where'/Work/Data/Social Science Datatsets/Indices of Deprivation/English ID 2010" -local pcluts = "`where'/Work/Data/GIS data/UK Postcodes/pcluts_2007nov_processed" -local urbpath = "`where'/Work/Data/Social Science Datatsets/UK Urban Rural Classification" - -log using "`rpath'/extract-DECC-LSOA-energy-data-from-excel-$S_DATE.smcl", replace - -local years "2007 2008 2009 2010" -* set to 1 to get set-up etc to run -local do_2007 = 0 -local do_2008 = 0 -local do_2009 = 0 -local do_2010 = 0 -local do_matrix_graphs = 0 -local do_bar_graphs = 1 - - - - -* 2007 = experimental pilots - -local dist_testvars "morphologyname imd2010_dec" - -* 2008 -if `do_2008' { - * 2008 - data in GOR tab format!! - local y = "2008" - local gor1 "North West" - local gor2 "West Midlands" - local gor3 "East Midlands" - local gor4 "North East" - local gor5 "Yorkshire and Humber" - local gor6 "East of England" - local gor7 "Greater London" - local gor8 "South East" - local gor9 "South West" - local gor10 "Wales" - - foreach g of numlist 1/10 { - di "* Importing GOR `g'" - capture noisily import excel "`dpath'/`y'/790-llsoa-electricity-`y'-england-wales.xls", sheet("`gor`g''") cellrange(A13) allstring firstrow clear - save "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta", replace - } - - * start with first - use "`dpath'/`y'/790-llsoa-electricity-`y'-gor1.dta", clear - - * append the others - foreach g of numlist 2/10 { - append using "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta" - } - - destring Ordinarydomesticconsumption Economy7consumption Numberofordinarydomesticmete /// - Numberofeconomy7meters Averageordinarydomesticconsum Averageeconomy7consumption, replace float force - - - rename LLSOAcode lsoacode_`y' - gen zonecode = lsoacode_`y' - - * duplicates? Caused by disclosure control? - duplicates report zonecode - duplicates tag zonecode, gen(dups_`y') - gen gor_`y' = GOR - lab var gor_`y' "Government Office Region (`y' data)" - *lab def gor_`y' 1 "North East" 2 "North West" 3 "Yorkshire and Humber" 4 "East Midlands" 5 "West Midlands" 6 "East of England" 7 "Greater London" 8 "South East" 9 "South West" 10 "Wales" - *lab val gor_`y' gor_`y' - tab gor_`y' dups_`y', mi - duplicates drop zonecode, force - - rename Ordinarydomesticconsumption OrdConsumption_`y' - rename Economy7consumption Econ7consumption_`y' - rename Numberofordinarydomesticmete NumOrdMeters_`y' - rename Numberofeconomy7meters NumEcon7Meters_`y' - rename Averageordinarydomesticconsum AvgOrdConsumption_`y' - rename Averageeconomy7consumption AvgEcon7consumption_`y' - - lab var OrdConsumption_`y' "Ordinary domestic consumption `y'" - lab var Econ7consumption_`y' "Economy 7 consumption `y'" - lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'" - lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'" - lab var AvgOrdConsumption_`y' "Average domestic consumption `y'" - lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'" - - - save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace - di "* Done `y'" - di "*****************************" -} - -if `do_2009' { - * 2009 - data in .csv files - local y = "2009" - insheet using "`dpath'/`y'/2347-llsoa-domestic-elec-raw.csv", clear - rename llsoa lsoacode_`y' - gen zonecode = lsoacode_`y' - rename consumption_dom OrdConsumption_`y' - lab var OrdConsumption_`y' "Ordinary domestic consumption `y'" - rename consumption_e7 Econ7consumption_`y' - lab var Econ7consumption_`y' "Economy 7 consumption `y'" - rename no_mpan_dom NumOrdMeters_`y' - lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'" - rename no_mpan_e7 NumEcon7Meters_`y' - lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'" - rename av_consumption_dom AvgOrdConsumption_`y' - lab var AvgOrdConsumption_`y' "Average domestic consumption `y'" - rename av_consumption_e7 AvgEcon7consumption_`y' - lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'" - * new for 2009 - rename no_dis_dom no_dis_dom_`y' - rename no_dis_e7 no_dis_e7_`y' - - * duplicates? Caused by disclosure control? - duplicates report zonecode - duplicates tag zonecode, gen(dups_`y') - - tab la_name dups_`y', mi - duplicates drop zonecode, force - - save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace - di "* Done `y'" - di "*****************************" - -} - -if `do_2010' { - * 2010 - data - local y = "2010" - - import excel "`dpath'/`y'/4813-llsoa-domestic-elec-est-`y'-fixed.xls", sheet("LLSOA Electricity Domestic") firstrow clear - - * insheet using "`dpath'/`y'/4813-llsoa-domestic-elec-est-2010.csv", clear comma - rename lsoa lsoacode_`y' - gen zonecode = lsoacode_`y' - rename Ordinarydomesticconsumption OrdConsumption_`y' - lab var OrdConsumption_`y' "Ordinary domestic consumption `y'" - rename Economy7consumption Econ7consumption_`y' - lab var Econ7consumption_`y' "Economy 7 consumption `y'" - rename Numberofordinarydomesticmeters NumOrdMeters_`y' - lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'" - rename Numberofeconomy7meters NumEcon7Meters_`y' - lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'" - rename Averageordinarydomesticconsumption AvgOrdConsumption_`y' - lab var AvgOrdConsumption_`y' "Average domestic consumption `y'" - rename Averageeconomy7consumption AvgEcon7consumption_`y' - lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'" - - * duplicates? Caused by disclosure control? - duplicates report zonecode - duplicates tag zonecode, gen(dups_`y') - - tab la_name dups_`y', mi - duplicates drop zonecode, force - - save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace - di "* Done `y'" - di "*****************************" - -} - -* merge the files starting with baseline LSOA geography file so we can see what is missing -use "`urbpath'/Eng_Wales/lsoa/RUURB_2005_MAR_LSOA_EW.dta", clear - -merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_ew_lsoacode.dta", gen(m_lsoa_postcode) - -* English IMD 2010 -* non-matches will be Wales -merge 1:1 zonecode using "`eimdpath'/ID-2010-indices-domains.dta", gen(m_lsoa_eimd) -* English address & postcode counts -merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_uk_lsoacode_addr_counts.dta", gen(m_address_counts) - -* now DECC data - will be non-matches due to aggregation/non-disclosure -merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2008-all.dta", gen(m_elec_2008) -merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2009-all.dta", gen(m_elec_2009) -merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2010-all.dta", gen(m_elec_2010) - -* first line is junk for some reason -li in 1/2 -drop in 1 - -* test merges -foreach m of varlist m_* { - tab `m' -} - -su *_2008 *_2009 *_2010 - -local testvars "OrdConsumption Econ7consumption AvgOrdConsumption AvgEcon7consumption" -local years "2008 2009 2010" -local diffyears "2008 2009" - -foreach v of local testvars { - * 2009 - 2008 - gen `v'_09_08_dif = `v'_2009 - `v'_2008 - lab var `v'_09_08_dif "Change in `v' (2009 - 2008)" - - gen `v'_09_08_difpc = 100*(`v'_09_08_dif/`v'_2008) - lab var `v'_09_08_difpc "Change in `v' (2009 - 2008) as % of 2008" - - * 2010 - 2009 - gen `v'_10_09_dif = `v'_2010 - `v'_2009 - lab var `v'_10_09_dif "Change in `v' (2010 - 2009)" - - gen `v'_10_09_difpc = 100*(`v'_10_09_dif/`v'_2009) - lab var `v'_10_09_difpc "Change in `v' (2010 - 2009) as % of 2009" - - * 2010 - 2008 - gen `v'_10_08_dif = `v'_2010 - `v'_2008 - lab var `v'_10_08_dif "Change in `v' (2010 - 2008)" - - gen `v'_10_08_difpc = 100*(`v'_10_08_dif/`v'_2008) - lab var `v'_10_08_difpc "Change in `v' (2010 - 2008) as % of 2008" - - foreach y of local years { - di "****************" - di "* Testing `v' for `y'" - di "*" - gen z`v'_`y' = 0 - lab var z`v'_`y' "Value is less than zero (`v', `y')" - replace z`v'_`y' = 1 if `v'_`y' < 0 - - * How many LSOAs have negative values? - table regionname morphologyname z`v'_`y' - * which LSOAs are they? - li regionname districtname lowersoacode NumOrdMeters_`y' NumEcon7Meters_`y' z`v'_`y' `v'_`y' if z`v'_`y' == 1, sep(0) noobs - di "*" - di "* End testing `v' for `y'" - di "****************" - } - di "****************" -} - - -if `do_matrix_graphs' { - * test year change - - graph matrix OrdConsumption_*, msize(tiny) name(OrdConsumption) half scale(0.75) - graph export "`rpath'/matrix-OrdConsumption.png", replace - graph matrix Econ7consumption_*, msize(tiny) name(Econ7consumption) half scale(0.75) - graph export "`rpath'/matrix-Econ7consumption.png", replace - - graph matrix NumOrdMeters_*, msize(tiny) name(NumOrdMeters) half scale(0.75) - graph export "`rpath'/matrix-NumOrdMeters.png", replace - graph matrix NumEcon7Meters_*, msize(tiny) name(NumEcon7Meters) half scale(0.75) - graph export "`rpath'/matrix-NumEcon7Meters.png", replace - - graph matrix AvgOrdConsumption_*, msize(tiny) name(AvgOrdConsumption) half scale(0.75) - graph export "`rpath'/matrix-AvgOrdConsumption.png", replace - graph matrix AvgEcon7consumption_*, msize(tiny) name(AvgEcon7consumption) half scale(0.75) - graph export "`rpath'/matrix-AvgEcon7consumption.png", replace - - * compare with address counts - graph matrix NumOrdMeters_* g_uk_address_count g_uk_deliverypoint_count, msize(tiny) name(address_counts) half scale(0.75) - graph export "`rpath'/matrix-ord-meter-address-counts.png", replace -} - -if `do_bar_graphs' { - graph hbar *_difpc, by(morphologyname) name(hbar_morph) - graph export "`rpath'/hbar-difpc-morphologyname.png", replace - graph hbar *_difpc, by(imd2010_dec) name(hbar_imd2010_dec) - graph export "`rpath'/hbar-difdif-c-imd2010_dec.png", replace - -} - -drop v* - -su z* - -su *dif* - -local vars = "NumOrdMeters NumEcon7Meters" -local years "2008 2009 2010" - -foreach v of local vars { - foreach y of local years { - gen `v'_`y'_addr_ppn = `v'_`y'/g_uk_address_count - gen `v'_`y'_delptc_ppn = `v'_`y'/g_uk_deliverypoint_count - } -} - -local vars = "OrdConsumption_ Econ7consumption_ AvgOrdConsumption_ AvgEcon7consumption_ NumOrdMeters_ NumEcon7Meters_" - -foreach dv of local dist_testvars { - di "* Testing by `dv'" - foreach v of local vars { - di "* -> Testing `v'* " - tabstat `v'*, by(`dv') - } - di "* -> Testing NumMeters_* " - tabstat *Meters_*_addr_ppn , by(`dv') - tabstat *Meters_*_delptc_ppn , by(`dv') -} - - -preserve - keep zonecode regioncode regionname countycode countyname districtcode districtname *200* *dif* - outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo.csv", comma replace - outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo-southwest.csv" if regionname == "South West", comma replace - export excel using "`dpath'/processed/llsoa-electricity-all-years-England-geo.xls", sheet("data") firstrow(variables) replace - -restore -save "`dpath'/processed/llsoa-electricity-all-years-England-geo.dta", replace - - -log close +local eimdpath = "`where'/Work/Data/Social Science Datatsets/Indices of Deprivation/English ID 2010" +local pcluts = "`where'/Work/Data/GIS data/UK Postcodes/pcluts_2007nov_processed" +local urbpath = "`where'/Work/Data/Social Science Datatsets/UK Urban Rural Classification" + +log using "`rpath'/extract-DECC-LSOA-energy-data-from-excel-$S_DATE.smcl", replace + +local years "2007 2008 2009 2010" +* set to 1 to get set-up etc to run +local do_2007 = 0 +local do_2008 = 0 +local do_2009 = 0 +local do_2010 = 0 +local do_matrix_graphs = 0 +local do_bar_graphs = 1 + + + + +* 2007 = experimental pilots + +local dist_testvars "morphologyname imd2010_dec" + +* 2008 +if `do_2008' { + * 2008 - data in GOR tab format!! + local y = "2008" + local gor1 "North West" + local gor2 "West Midlands" + local gor3 "East Midlands" + local gor4 "North East" + local gor5 "Yorkshire and Humber" + local gor6 "East of England" + local gor7 "Greater London" + local gor8 "South East" + local gor9 "South West" + local gor10 "Wales" + + foreach g of numlist 1/10 { + di "* Importing GOR `g'" + capture noisily import excel "`dpath'/`y'/790-llsoa-electricity-`y'-england-wales.xls", sheet("`gor`g''") cellrange(A13) allstring firstrow clear + save "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta", replace + } + + * start with first + use "`dpath'/`y'/790-llsoa-electricity-`y'-gor1.dta", clear + + * append the others + foreach g of numlist 2/10 { + append using "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta" + } + + destring Ordinarydomesticconsumption Economy7consumption Numberofordinarydomesticmete /// + Numberofeconomy7meters Averageordinarydomesticconsum Averageeconomy7consumption, replace float force + + + rename LLSOAcode lsoacode_`y' + gen zonecode = lsoacode_`y' + + * duplicates? Caused by disclosure control? + duplicates report zonecode + duplicates tag zonecode, gen(dups_`y') + gen gor_`y' = GOR + lab var gor_`y' "Government Office Region (`y' data)" + *lab def gor_`y' 1 "North East" 2 "North West" 3 "Yorkshire and Humber" 4 "East Midlands" 5 "West Midlands" 6 "East of England" 7 "Greater London" 8 "South East" 9 "South West" 10 "Wales" + *lab val gor_`y' gor_`y' + tab gor_`y' dups_`y', mi + duplicates drop zonecode, force + + rename Ordinarydomesticconsumption OrdConsumption_`y' + rename Economy7consumption Econ7consumption_`y' + rename Numberofordinarydomesticmete NumOrdMeters_`y' + rename Numberofeconomy7meters NumEcon7Meters_`y' + rename Averageordinarydomesticconsum AvgOrdConsumption_`y' + rename Averageeconomy7consumption AvgEcon7consumption_`y' + + lab var OrdConsumption_`y' "Ordinary domestic consumption `y'" + lab var Econ7consumption_`y' "Economy 7 consumption `y'" + lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'" + lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'" + lab var AvgOrdConsumption_`y' "Average domestic consumption `y'" + lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'" + + + save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace + di "* Done `y'" + di "*****************************" +} + +if `do_2009' { + * 2009 - data in .csv files + local y = "2009" + insheet using "`dpath'/`y'/2347-llsoa-domestic-elec-raw.csv", clear + rename llsoa lsoacode_`y' + gen zonecode = lsoacode_`y' + rename consumption_dom OrdConsumption_`y' + lab var OrdConsumption_`y' "Ordinary domestic consumption `y'" + rename consumption_e7 Econ7consumption_`y' + lab var Econ7consumption_`y' "Economy 7 consumption `y'" + rename no_mpan_dom NumOrdMeters_`y' + lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'" + rename no_mpan_e7 NumEcon7Meters_`y' + lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'" + rename av_consumption_dom AvgOrdConsumption_`y' + lab var AvgOrdConsumption_`y' "Average domestic consumption `y'" + rename av_consumption_e7 AvgEcon7consumption_`y' + lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'" + * new for 2009 + rename no_dis_dom no_dis_dom_`y' + rename no_dis_e7 no_dis_e7_`y' + + * duplicates? Caused by disclosure control? + duplicates report zonecode + duplicates tag zonecode, gen(dups_`y') + + tab la_name dups_`y', mi + duplicates drop zonecode, force + + save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace + di "* Done `y'" + di "*****************************" + +} + +if `do_2010' { + * 2010 - data + local y = "2010" + + import excel "`dpath'/`y'/4813-llsoa-domestic-elec-est-`y'-fixed.xls", sheet("LLSOA Electricity Domestic") firstrow clear + + * insheet using "`dpath'/`y'/4813-llsoa-domestic-elec-est-2010.csv", clear comma + rename lsoa lsoacode_`y' + gen zonecode = lsoacode_`y' + rename Ordinarydomesticconsumption OrdConsumption_`y' + lab var OrdConsumption_`y' "Ordinary domestic consumption `y'" + rename Economy7consumption Econ7consumption_`y' + lab var Econ7consumption_`y' "Economy 7 consumption `y'" + rename Numberofordinarydomesticmeters NumOrdMeters_`y' + lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'" + rename Numberofeconomy7meters NumEcon7Meters_`y' + lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'" + rename Averageordinarydomesticconsumption AvgOrdConsumption_`y' + lab var AvgOrdConsumption_`y' "Average domestic consumption `y'" + rename Averageeconomy7consumption AvgEcon7consumption_`y' + lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'" + + * duplicates? Caused by disclosure control? + duplicates report zonecode + duplicates tag zonecode, gen(dups_`y') + + tab la_name dups_`y', mi + duplicates drop zonecode, force + + save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace + di "* Done `y'" + di "*****************************" + +} + +* merge the files starting with baseline LSOA geography file so we can see what is missing +use "`urbpath'/Eng_Wales/lsoa/RUURB_2005_MAR_LSOA_EW.dta", clear + +merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_ew_lsoacode.dta", gen(m_lsoa_postcode) + +* English IMD 2010 +* non-matches will be Wales +merge 1:1 zonecode using "`eimdpath'/ID-2010-indices-domains.dta", gen(m_lsoa_eimd) +* English address & postcode counts +merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_uk_lsoacode_addr_counts.dta", gen(m_address_counts) + +* now DECC data - will be non-matches due to aggregation/non-disclosure +merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2008-all.dta", gen(m_elec_2008) +merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2009-all.dta", gen(m_elec_2009) +merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2010-all.dta", gen(m_elec_2010) + +* first line is junk for some reason +li in 1/2 +drop in 1 + +* test merges +foreach m of varlist m_* { + tab `m' +} + +su *_2008 *_2009 *_2010 + +local testvars "OrdConsumption Econ7consumption AvgOrdConsumption AvgEcon7consumption" +local years "2008 2009 2010" +local diffyears "2008 2009" + +foreach v of local testvars { + * 2009 - 2008 + gen `v'_09_08_dif = `v'_2009 - `v'_2008 + lab var `v'_09_08_dif "Change in `v' (2009 - 2008)" + + gen `v'_09_08_difpc = 100*(`v'_09_08_dif/`v'_2008) + lab var `v'_09_08_difpc "Change in `v' (2009 - 2008) as % of 2008" + + * 2010 - 2009 + gen `v'_10_09_dif = `v'_2010 - `v'_2009 + lab var `v'_10_09_dif "Change in `v' (2010 - 2009)" + + gen `v'_10_09_difpc = 100*(`v'_10_09_dif/`v'_2009) + lab var `v'_10_09_difpc "Change in `v' (2010 - 2009) as % of 2009" + + * 2010 - 2008 + gen `v'_10_08_dif = `v'_2010 - `v'_2008 + lab var `v'_10_08_dif "Change in `v' (2010 - 2008)" + + gen `v'_10_08_difpc = 100*(`v'_10_08_dif/`v'_2008) + lab var `v'_10_08_difpc "Change in `v' (2010 - 2008) as % of 2008" + + foreach y of local years { + di "****************" + di "* Testing `v' for `y'" + di "*" + gen z`v'_`y' = 0 + lab var z`v'_`y' "Value is less than zero (`v', `y')" + replace z`v'_`y' = 1 if `v'_`y' < 0 + + * How many LSOAs have negative values? + table regionname morphologyname z`v'_`y' + * which LSOAs are they? + li regionname districtname lowersoacode NumOrdMeters_`y' NumEcon7Meters_`y' z`v'_`y' `v'_`y' if z`v'_`y' == 1, sep(0) noobs + di "*" + di "* End testing `v' for `y'" + di "****************" + } + di "****************" +} + + +if `do_matrix_graphs' { + * test year change + + graph matrix OrdConsumption_*, msize(tiny) name(OrdConsumption) half scale(0.75) + graph export "`rpath'/matrix-OrdConsumption.png", replace + graph matrix Econ7consumption_*, msize(tiny) name(Econ7consumption) half scale(0.75) + graph export "`rpath'/matrix-Econ7consumption.png", replace + + graph matrix NumOrdMeters_*, msize(tiny) name(NumOrdMeters) half scale(0.75) + graph export "`rpath'/matrix-NumOrdMeters.png", replace + graph matrix NumEcon7Meters_*, msize(tiny) name(NumEcon7Meters) half scale(0.75) + graph export "`rpath'/matrix-NumEcon7Meters.png", replace + + graph matrix AvgOrdConsumption_*, msize(tiny) name(AvgOrdConsumption) half scale(0.75) + graph export "`rpath'/matrix-AvgOrdConsumption.png", replace + graph matrix AvgEcon7consumption_*, msize(tiny) name(AvgEcon7consumption) half scale(0.75) + graph export "`rpath'/matrix-AvgEcon7consumption.png", replace + + * compare with address counts + graph matrix NumOrdMeters_* g_uk_address_count g_uk_deliverypoint_count, msize(tiny) name(address_counts) half scale(0.75) + graph export "`rpath'/matrix-ord-meter-address-counts.png", replace +} + +if `do_bar_graphs' { + graph hbar *_difpc, by(morphologyname) name(hbar_morph) + graph export "`rpath'/hbar-difpc-morphologyname.png", replace + graph hbar *_difpc, by(imd2010_dec) name(hbar_imd2010_dec) + graph export "`rpath'/hbar-difdif-c-imd2010_dec.png", replace + +} + +drop v* + +su z* + +su *dif* + +local vars = "NumOrdMeters NumEcon7Meters" +local years "2008 2009 2010" + +foreach v of local vars { + foreach y of local years { + gen `v'_`y'_addr_ppn = `v'_`y'/g_uk_address_count + gen `v'_`y'_delptc_ppn = `v'_`y'/g_uk_deliverypoint_count + } +} + +local vars = "OrdConsumption_ Econ7consumption_ AvgOrdConsumption_ AvgEcon7consumption_ NumOrdMeters_ NumEcon7Meters_" + +foreach dv of local dist_testvars { + di "* Testing by `dv'" + foreach v of local vars { + di "* -> Testing `v'* " + tabstat `v'*, by(`dv') + } + di "* -> Testing NumMeters_* " + tabstat *Meters_*_addr_ppn , by(`dv') + tabstat *Meters_*_delptc_ppn , by(`dv') +} + + +preserve + keep zonecode regioncode regionname countycode countyname districtcode districtname *200* *dif* + outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo.csv", comma replace + outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo-southwest.csv" if regionname == "South West", comma replace + export excel using "`dpath'/processed/llsoa-electricity-all-years-England-geo.xls", sheet("data") firstrow(variables) replace + +restore +save "`dpath'/processed/llsoa-electricity-all-years-England-geo.dta", replace + + +log close