diff --git a/extract-DECC-LSOA-energy-data-from-excel.do b/extract-DECC-LSOA-energy-data-from-excel.do
index df878135995a85accb4cd8cd10cc61390becd4ee..31a3aceba018a5044f47da312f298bd1c80ebcfb 100644
--- a/extract-DECC-LSOA-energy-data-from-excel.do
+++ b/extract-DECC-LSOA-energy-data-from-excel.do
@@ -1,320 +1,329 @@
-* Extract data from DECC extracts
-
-clear all
-
-capture log close
-
-set more off
-
+* Extract data from DECC LSOA level domestic energy consumption data:
+* https://www.gov.uk/government/collections/mlsoa-and-llsoa-electricity-and-gas-estimates
+
+* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
+* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
+* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
+* b.anderson@soton.ac.uk
+* (c) University of Southampton
+* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
+* http://creativecommons.org/licenses/by-nc/4.0/
+
+clear all
+
+capture log close
+
+set more off
+
 local where = "/Users/ben/Documents"
 local projroot = "`where'/Work/Data/Social Science Datatsets/DECC"
 
 local dpath = "`projroot'/LSOA Energy Data"
 local dofiles = "`projroot'/do_files"
-local rpath = "`projroot'/LSOA Energy Data/processed"
-
-* census data location
+local rpath = "`projroot'/LSOA Energy Data/processed"
+
+* census data location
 local cpath = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data"
-local eimdpath = "`where'/Work/Data/Social Science Datatsets/Indices of Deprivation/English ID 2010"
-local pcluts = "`where'/Work/Data/GIS data/UK Postcodes/pcluts_2007nov_processed"
-local urbpath = "`where'/Work/Data/Social Science Datatsets/UK Urban Rural Classification"
-
-log using "`rpath'/extract-DECC-LSOA-energy-data-from-excel-$S_DATE.smcl", replace
-
-local years "2007 2008 2009 2010"
-* set to 1 to get set-up etc to run
-local do_2007 = 0
-local do_2008 = 0
-local do_2009 = 0
-local do_2010 = 0
-local do_matrix_graphs = 0
-local do_bar_graphs = 1
-
-
-
-
-* 2007 = experimental pilots
-
-local dist_testvars "morphologyname imd2010_dec"
-
-* 2008
-if `do_2008' {
-	* 2008 - data in GOR tab format!!
-	local y = "2008"
-	local gor1 "North West"
-	local gor2 "West Midlands"
-	local gor3 "East Midlands"
-	local gor4 "North East"
-	local gor5 "Yorkshire and Humber"
-	local gor6 "East of England"
-	local gor7 "Greater London"
-	local gor8 "South East"
-	local gor9 "South West"
-	local gor10 "Wales"
-	
-	foreach g of numlist 1/10 {
-		di "* Importing GOR `g'"
-		capture noisily import excel "`dpath'/`y'/790-llsoa-electricity-`y'-england-wales.xls", sheet("`gor`g''") cellrange(A13) allstring firstrow clear
-		save "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta", replace
-	}
-	
-	* start with first
-	use "`dpath'/`y'/790-llsoa-electricity-`y'-gor1.dta", clear
-	
-	* append the others
-	foreach g of numlist 2/10 {
-		append using "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta"
-	}
-	
-	destring Ordinarydomesticconsumption Economy7consumption Numberofordinarydomesticmete ///
-		Numberofeconomy7meters Averageordinarydomesticconsum Averageeconomy7consumption, replace float force
-	
-	
-	rename LLSOAcode lsoacode_`y'
-	gen zonecode = lsoacode_`y'
-	
-	* duplicates? Caused by disclosure control?
-	duplicates report zonecode
-	duplicates tag zonecode, gen(dups_`y')
-	gen gor_`y' = GOR
-	lab var gor_`y' "Government Office Region (`y' data)"
-	*lab def gor_`y' 1 "North East" 2 "North West" 3 "Yorkshire and Humber" 4 "East Midlands" 5 "West Midlands" 6 "East of England" 7 "Greater London" 8 "South East" 9 "South West" 10 "Wales"
-	*lab val gor_`y' gor_`y'
-	tab gor_`y' dups_`y', mi
-	duplicates drop zonecode, force
-	
-	rename Ordinarydomesticconsumption OrdConsumption_`y'
-	rename Economy7consumption Econ7consumption_`y'
-	rename Numberofordinarydomesticmete NumOrdMeters_`y'
-	rename Numberofeconomy7meters NumEcon7Meters_`y'
-	rename Averageordinarydomesticconsum AvgOrdConsumption_`y'
-	rename Averageeconomy7consumption AvgEcon7consumption_`y'
-	
-	lab var OrdConsumption_`y' "Ordinary domestic consumption `y'"
-	lab var Econ7consumption_`y' "Economy 7 consumption `y'"
-	lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'"
-	lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'"
-	lab var AvgOrdConsumption_`y' "Average domestic consumption `y'"
-	lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'"
-
-
-	save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace
-	di "* Done `y'"
-	di "*****************************"
-}
-
-if `do_2009' {
-	* 2009 - data in .csv files
-	local y = "2009"
-	insheet using "`dpath'/`y'/2347-llsoa-domestic-elec-raw.csv", clear
-	rename llsoa lsoacode_`y'
-	gen zonecode = lsoacode_`y'
-	rename consumption_dom OrdConsumption_`y'
-	lab var OrdConsumption_`y' "Ordinary domestic consumption `y'"
-	rename consumption_e7 Econ7consumption_`y'
-	lab var Econ7consumption_`y' "Economy 7 consumption `y'"
-	rename no_mpan_dom NumOrdMeters_`y'
-	lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'"
-	rename no_mpan_e7 NumEcon7Meters_`y'
-	lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'"
-	rename av_consumption_dom AvgOrdConsumption_`y'
-	lab var AvgOrdConsumption_`y' "Average domestic consumption `y'"
-	rename av_consumption_e7 AvgEcon7consumption_`y'
-	lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'"
-	* new for 2009
-	rename no_dis_dom no_dis_dom_`y'
-	rename no_dis_e7 no_dis_e7_`y'
-	
-	* duplicates? Caused by disclosure control?
-	duplicates report zonecode
-	duplicates tag zonecode, gen(dups_`y')
-
-	tab la_name dups_`y', mi
-	duplicates drop zonecode, force
-
-	save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace
-	di "* Done `y'"
-	di "*****************************"
- 
-}
-
-if `do_2010' {
-	* 2010 - data 
-	local y = "2010"
-	
-	import excel "`dpath'/`y'/4813-llsoa-domestic-elec-est-`y'-fixed.xls", sheet("LLSOA Electricity Domestic") firstrow clear
-
-	* insheet using "`dpath'/`y'/4813-llsoa-domestic-elec-est-2010.csv", clear comma
-	rename lsoa lsoacode_`y'
-	gen zonecode = lsoacode_`y'
-	rename  Ordinarydomesticconsumption OrdConsumption_`y'
-	lab var OrdConsumption_`y' "Ordinary domestic consumption `y'"
-	rename  Economy7consumption Econ7consumption_`y'
-	lab var Econ7consumption_`y' "Economy 7 consumption `y'"
-	rename  Numberofordinarydomesticmeters NumOrdMeters_`y'
-	lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'"
-	rename  Numberofeconomy7meters NumEcon7Meters_`y'
-	lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'"
-	rename  Averageordinarydomesticconsumption AvgOrdConsumption_`y'
-	lab var AvgOrdConsumption_`y' "Average domestic consumption `y'"
-	rename  Averageeconomy7consumption AvgEcon7consumption_`y'
-	lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'"
-	
-	* duplicates? Caused by disclosure control?
-	duplicates report zonecode
-	duplicates tag zonecode, gen(dups_`y')
-
-	tab la_name dups_`y', mi
-	duplicates drop zonecode, force
-
-	save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace
-	di "* Done `y'"
-	di "*****************************"
-
-}
-
-* merge the files starting with baseline LSOA geography file so we can see what is missing
-use "`urbpath'/Eng_Wales/lsoa/RUURB_2005_MAR_LSOA_EW.dta", clear
-
-merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_ew_lsoacode.dta", gen(m_lsoa_postcode)
-
-* English IMD 2010
-* non-matches will be Wales
-merge 1:1 zonecode using "`eimdpath'/ID-2010-indices-domains.dta", gen(m_lsoa_eimd)
-* English address & postcode counts
-merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_uk_lsoacode_addr_counts.dta", gen(m_address_counts)
-
-* now DECC data - will be non-matches due to aggregation/non-disclosure
-merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2008-all.dta", gen(m_elec_2008)
-merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2009-all.dta", gen(m_elec_2009)
-merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2010-all.dta", gen(m_elec_2010)
-
-* first line is junk for some reason
-li in 1/2
-drop in 1
-	
-* test merges
-foreach m of varlist m_* {
-	tab `m'
-}
-
-su *_2008 *_2009 *_2010
-
-local testvars "OrdConsumption Econ7consumption AvgOrdConsumption AvgEcon7consumption"
-local years "2008 2009 2010"
-local diffyears "2008 2009"
-
-foreach v of local testvars {
-	* 2009 - 2008
-	gen `v'_09_08_dif = `v'_2009 - `v'_2008
-	lab var `v'_09_08_dif "Change in `v' (2009 - 2008)"
-	
-	gen `v'_09_08_difpc = 100*(`v'_09_08_dif/`v'_2008)
-	lab var `v'_09_08_difpc "Change in `v' (2009 - 2008) as % of 2008"
-	
-	* 2010 - 2009
-	gen `v'_10_09_dif = `v'_2010 - `v'_2009
-	lab var `v'_10_09_dif "Change in `v' (2010 - 2009)"
-
-	gen `v'_10_09_difpc = 100*(`v'_10_09_dif/`v'_2009)
-	lab var `v'_10_09_difpc "Change in `v' (2010 - 2009) as % of 2009"
-
-	* 2010 - 2008
-	gen `v'_10_08_dif = `v'_2010 - `v'_2008
-	lab var `v'_10_08_dif "Change in `v' (2010 - 2008)"
-
-	gen `v'_10_08_difpc = 100*(`v'_10_08_dif/`v'_2008)
-	lab var `v'_10_08_difpc "Change in `v' (2010 - 2008) as % of 2008"
-
-	foreach y of local years {
-		di "****************"
-		di "* Testing `v' for `y'"
-		di "*"
-		gen z`v'_`y' = 0
-		lab var z`v'_`y' "Value is less than zero (`v', `y')"
-		replace z`v'_`y' = 1 if `v'_`y' < 0
-		
-		* How many LSOAs have negative values?
-		table regionname morphologyname z`v'_`y' 
-		* which LSOAs are they?
-		li regionname districtname lowersoacode NumOrdMeters_`y' NumEcon7Meters_`y' z`v'_`y' `v'_`y' if z`v'_`y' == 1, sep(0) noobs
-		di "*"
-		di "* End testing `v' for `y'"
-		di "****************"
-	}
-	di "****************"
-}
-
-
-if `do_matrix_graphs' {
-	* test year change
-
-	graph matrix OrdConsumption_*, msize(tiny) name(OrdConsumption) half scale(0.75)
-	graph export "`rpath'/matrix-OrdConsumption.png", replace
-	graph matrix Econ7consumption_*, msize(tiny) name(Econ7consumption) half scale(0.75)
-	graph export "`rpath'/matrix-Econ7consumption.png", replace
-	
-	graph matrix NumOrdMeters_*, msize(tiny) name(NumOrdMeters) half scale(0.75) 
-	graph export "`rpath'/matrix-NumOrdMeters.png", replace
-	graph matrix NumEcon7Meters_*, msize(tiny) name(NumEcon7Meters) half scale(0.75) 
-	graph export "`rpath'/matrix-NumEcon7Meters.png", replace
-	
-	graph matrix AvgOrdConsumption_*, msize(tiny) name(AvgOrdConsumption) half scale(0.75) 
-	graph export "`rpath'/matrix-AvgOrdConsumption.png", replace
-	graph matrix AvgEcon7consumption_*, msize(tiny) name(AvgEcon7consumption) half scale(0.75) 
-	graph export "`rpath'/matrix-AvgEcon7consumption.png", replace
-	
-	* compare with address counts
-	graph matrix NumOrdMeters_* g_uk_address_count g_uk_deliverypoint_count, msize(tiny) name(address_counts) half scale(0.75)
-	graph export "`rpath'/matrix-ord-meter-address-counts.png", replace
-}
-
-if `do_bar_graphs' {
-	graph hbar *_difpc, by(morphologyname) name(hbar_morph)
-	graph export "`rpath'/hbar-difpc-morphologyname.png", replace
-	graph hbar *_difpc, by(imd2010_dec) name(hbar_imd2010_dec)
-	graph export "`rpath'/hbar-difdif-c-imd2010_dec.png", replace
-
-}
-
-drop v*
-
-su z*
-
-su *dif*
-
-local vars = "NumOrdMeters NumEcon7Meters"
-local years "2008 2009 2010"
-
-foreach v of local vars {
-	foreach y of local years {
-		gen `v'_`y'_addr_ppn = `v'_`y'/g_uk_address_count
-		gen `v'_`y'_delptc_ppn = `v'_`y'/g_uk_deliverypoint_count
-	}
-}
-
-local vars = "OrdConsumption_ Econ7consumption_ AvgOrdConsumption_ AvgEcon7consumption_ NumOrdMeters_ NumEcon7Meters_"
-
-foreach dv of local dist_testvars {
-	di "* Testing by `dv'"
-	foreach v of local vars {
-		di "* -> Testing `v'* "
-		tabstat `v'*, by(`dv')
-	}
-	di "* -> Testing NumMeters_* "
-	tabstat *Meters_*_addr_ppn , by(`dv')
-	tabstat *Meters_*_delptc_ppn , by(`dv')
-}
-
-
-preserve
-	keep zonecode regioncode regionname countycode countyname districtcode districtname *200* *dif*
-	outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo.csv", comma replace
-	outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo-southwest.csv" if regionname == "South West", comma replace
-	export excel using "`dpath'/processed/llsoa-electricity-all-years-England-geo.xls", sheet("data") firstrow(variables) replace
-
-restore
-save "`dpath'/processed/llsoa-electricity-all-years-England-geo.dta", replace
-
-
-log close
+local eimdpath = "`where'/Work/Data/Social Science Datatsets/Indices of Deprivation/English ID 2010"
+local pcluts = "`where'/Work/Data/GIS data/UK Postcodes/pcluts_2007nov_processed"
+local urbpath = "`where'/Work/Data/Social Science Datatsets/UK Urban Rural Classification"
+
+log using "`rpath'/extract-DECC-LSOA-energy-data-from-excel-$S_DATE.smcl", replace
+
+local years "2007 2008 2009 2010"
+* set to 1 to get set-up etc to run
+local do_2007 = 0
+local do_2008 = 0
+local do_2009 = 0
+local do_2010 = 0
+local do_matrix_graphs = 0
+local do_bar_graphs = 1
+
+
+
+
+* 2007 = experimental pilots
+
+local dist_testvars "morphologyname imd2010_dec"
+
+* 2008
+if `do_2008' {
+	* 2008 - data in GOR tab format!!
+	local y = "2008"
+	local gor1 "North West"
+	local gor2 "West Midlands"
+	local gor3 "East Midlands"
+	local gor4 "North East"
+	local gor5 "Yorkshire and Humber"
+	local gor6 "East of England"
+	local gor7 "Greater London"
+	local gor8 "South East"
+	local gor9 "South West"
+	local gor10 "Wales"
+	
+	foreach g of numlist 1/10 {
+		di "* Importing GOR `g'"
+		capture noisily import excel "`dpath'/`y'/790-llsoa-electricity-`y'-england-wales.xls", sheet("`gor`g''") cellrange(A13) allstring firstrow clear
+		save "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta", replace
+	}
+	
+	* start with first
+	use "`dpath'/`y'/790-llsoa-electricity-`y'-gor1.dta", clear
+	
+	* append the others
+	foreach g of numlist 2/10 {
+		append using "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta"
+	}
+	
+	destring Ordinarydomesticconsumption Economy7consumption Numberofordinarydomesticmete ///
+		Numberofeconomy7meters Averageordinarydomesticconsum Averageeconomy7consumption, replace float force
+	
+	
+	rename LLSOAcode lsoacode_`y'
+	gen zonecode = lsoacode_`y'
+	
+	* duplicates? Caused by disclosure control?
+	duplicates report zonecode
+	duplicates tag zonecode, gen(dups_`y')
+	gen gor_`y' = GOR
+	lab var gor_`y' "Government Office Region (`y' data)"
+	*lab def gor_`y' 1 "North East" 2 "North West" 3 "Yorkshire and Humber" 4 "East Midlands" 5 "West Midlands" 6 "East of England" 7 "Greater London" 8 "South East" 9 "South West" 10 "Wales"
+	*lab val gor_`y' gor_`y'
+	tab gor_`y' dups_`y', mi
+	duplicates drop zonecode, force
+	
+	rename Ordinarydomesticconsumption OrdConsumption_`y'
+	rename Economy7consumption Econ7consumption_`y'
+	rename Numberofordinarydomesticmete NumOrdMeters_`y'
+	rename Numberofeconomy7meters NumEcon7Meters_`y'
+	rename Averageordinarydomesticconsum AvgOrdConsumption_`y'
+	rename Averageeconomy7consumption AvgEcon7consumption_`y'
+	
+	lab var OrdConsumption_`y' "Ordinary domestic consumption `y'"
+	lab var Econ7consumption_`y' "Economy 7 consumption `y'"
+	lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'"
+	lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'"
+	lab var AvgOrdConsumption_`y' "Average domestic consumption `y'"
+	lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'"
+
+
+	save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace
+	di "* Done `y'"
+	di "*****************************"
+}
+
+if `do_2009' {
+	* 2009 - data in .csv files
+	local y = "2009"
+	insheet using "`dpath'/`y'/2347-llsoa-domestic-elec-raw.csv", clear
+	rename llsoa lsoacode_`y'
+	gen zonecode = lsoacode_`y'
+	rename consumption_dom OrdConsumption_`y'
+	lab var OrdConsumption_`y' "Ordinary domestic consumption `y'"
+	rename consumption_e7 Econ7consumption_`y'
+	lab var Econ7consumption_`y' "Economy 7 consumption `y'"
+	rename no_mpan_dom NumOrdMeters_`y'
+	lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'"
+	rename no_mpan_e7 NumEcon7Meters_`y'
+	lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'"
+	rename av_consumption_dom AvgOrdConsumption_`y'
+	lab var AvgOrdConsumption_`y' "Average domestic consumption `y'"
+	rename av_consumption_e7 AvgEcon7consumption_`y'
+	lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'"
+	* new for 2009
+	rename no_dis_dom no_dis_dom_`y'
+	rename no_dis_e7 no_dis_e7_`y'
+	
+	* duplicates? Caused by disclosure control?
+	duplicates report zonecode
+	duplicates tag zonecode, gen(dups_`y')
+
+	tab la_name dups_`y', mi
+	duplicates drop zonecode, force
+
+	save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace
+	di "* Done `y'"
+	di "*****************************"
+ 
+}
+
+if `do_2010' {
+	* 2010 - data 
+	local y = "2010"
+	
+	import excel "`dpath'/`y'/4813-llsoa-domestic-elec-est-`y'-fixed.xls", sheet("LLSOA Electricity Domestic") firstrow clear
+
+	* insheet using "`dpath'/`y'/4813-llsoa-domestic-elec-est-2010.csv", clear comma
+	rename lsoa lsoacode_`y'
+	gen zonecode = lsoacode_`y'
+	rename  Ordinarydomesticconsumption OrdConsumption_`y'
+	lab var OrdConsumption_`y' "Ordinary domestic consumption `y'"
+	rename  Economy7consumption Econ7consumption_`y'
+	lab var Econ7consumption_`y' "Economy 7 consumption `y'"
+	rename  Numberofordinarydomesticmeters NumOrdMeters_`y'
+	lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'"
+	rename  Numberofeconomy7meters NumEcon7Meters_`y'
+	lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'"
+	rename  Averageordinarydomesticconsumption AvgOrdConsumption_`y'
+	lab var AvgOrdConsumption_`y' "Average domestic consumption `y'"
+	rename  Averageeconomy7consumption AvgEcon7consumption_`y'
+	lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'"
+	
+	* duplicates? Caused by disclosure control?
+	duplicates report zonecode
+	duplicates tag zonecode, gen(dups_`y')
+
+	tab la_name dups_`y', mi
+	duplicates drop zonecode, force
+
+	save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace
+	di "* Done `y'"
+	di "*****************************"
+
+}
+
+* merge the files starting with baseline LSOA geography file so we can see what is missing
+use "`urbpath'/Eng_Wales/lsoa/RUURB_2005_MAR_LSOA_EW.dta", clear
+
+merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_ew_lsoacode.dta", gen(m_lsoa_postcode)
+
+* English IMD 2010
+* non-matches will be Wales
+merge 1:1 zonecode using "`eimdpath'/ID-2010-indices-domains.dta", gen(m_lsoa_eimd)
+* English address & postcode counts
+merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_uk_lsoacode_addr_counts.dta", gen(m_address_counts)
+
+* now DECC data - will be non-matches due to aggregation/non-disclosure
+merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2008-all.dta", gen(m_elec_2008)
+merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2009-all.dta", gen(m_elec_2009)
+merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2010-all.dta", gen(m_elec_2010)
+
+* first line is junk for some reason
+li in 1/2
+drop in 1
+	
+* test merges
+foreach m of varlist m_* {
+	tab `m'
+}
+
+su *_2008 *_2009 *_2010
+
+local testvars "OrdConsumption Econ7consumption AvgOrdConsumption AvgEcon7consumption"
+local years "2008 2009 2010"
+local diffyears "2008 2009"
+
+foreach v of local testvars {
+	* 2009 - 2008
+	gen `v'_09_08_dif = `v'_2009 - `v'_2008
+	lab var `v'_09_08_dif "Change in `v' (2009 - 2008)"
+	
+	gen `v'_09_08_difpc = 100*(`v'_09_08_dif/`v'_2008)
+	lab var `v'_09_08_difpc "Change in `v' (2009 - 2008) as % of 2008"
+	
+	* 2010 - 2009
+	gen `v'_10_09_dif = `v'_2010 - `v'_2009
+	lab var `v'_10_09_dif "Change in `v' (2010 - 2009)"
+
+	gen `v'_10_09_difpc = 100*(`v'_10_09_dif/`v'_2009)
+	lab var `v'_10_09_difpc "Change in `v' (2010 - 2009) as % of 2009"
+
+	* 2010 - 2008
+	gen `v'_10_08_dif = `v'_2010 - `v'_2008
+	lab var `v'_10_08_dif "Change in `v' (2010 - 2008)"
+
+	gen `v'_10_08_difpc = 100*(`v'_10_08_dif/`v'_2008)
+	lab var `v'_10_08_difpc "Change in `v' (2010 - 2008) as % of 2008"
+
+	foreach y of local years {
+		di "****************"
+		di "* Testing `v' for `y'"
+		di "*"
+		gen z`v'_`y' = 0
+		lab var z`v'_`y' "Value is less than zero (`v', `y')"
+		replace z`v'_`y' = 1 if `v'_`y' < 0
+		
+		* How many LSOAs have negative values?
+		table regionname morphologyname z`v'_`y' 
+		* which LSOAs are they?
+		li regionname districtname lowersoacode NumOrdMeters_`y' NumEcon7Meters_`y' z`v'_`y' `v'_`y' if z`v'_`y' == 1, sep(0) noobs
+		di "*"
+		di "* End testing `v' for `y'"
+		di "****************"
+	}
+	di "****************"
+}
+
+
+if `do_matrix_graphs' {
+	* test year change
+
+	graph matrix OrdConsumption_*, msize(tiny) name(OrdConsumption) half scale(0.75)
+	graph export "`rpath'/matrix-OrdConsumption.png", replace
+	graph matrix Econ7consumption_*, msize(tiny) name(Econ7consumption) half scale(0.75)
+	graph export "`rpath'/matrix-Econ7consumption.png", replace
+	
+	graph matrix NumOrdMeters_*, msize(tiny) name(NumOrdMeters) half scale(0.75) 
+	graph export "`rpath'/matrix-NumOrdMeters.png", replace
+	graph matrix NumEcon7Meters_*, msize(tiny) name(NumEcon7Meters) half scale(0.75) 
+	graph export "`rpath'/matrix-NumEcon7Meters.png", replace
+	
+	graph matrix AvgOrdConsumption_*, msize(tiny) name(AvgOrdConsumption) half scale(0.75) 
+	graph export "`rpath'/matrix-AvgOrdConsumption.png", replace
+	graph matrix AvgEcon7consumption_*, msize(tiny) name(AvgEcon7consumption) half scale(0.75) 
+	graph export "`rpath'/matrix-AvgEcon7consumption.png", replace
+	
+	* compare with address counts
+	graph matrix NumOrdMeters_* g_uk_address_count g_uk_deliverypoint_count, msize(tiny) name(address_counts) half scale(0.75)
+	graph export "`rpath'/matrix-ord-meter-address-counts.png", replace
+}
+
+if `do_bar_graphs' {
+	graph hbar *_difpc, by(morphologyname) name(hbar_morph)
+	graph export "`rpath'/hbar-difpc-morphologyname.png", replace
+	graph hbar *_difpc, by(imd2010_dec) name(hbar_imd2010_dec)
+	graph export "`rpath'/hbar-difdif-c-imd2010_dec.png", replace
+
+}
+
+drop v*
+
+su z*
+
+su *dif*
+
+local vars = "NumOrdMeters NumEcon7Meters"
+local years "2008 2009 2010"
+
+foreach v of local vars {
+	foreach y of local years {
+		gen `v'_`y'_addr_ppn = `v'_`y'/g_uk_address_count
+		gen `v'_`y'_delptc_ppn = `v'_`y'/g_uk_deliverypoint_count
+	}
+}
+
+local vars = "OrdConsumption_ Econ7consumption_ AvgOrdConsumption_ AvgEcon7consumption_ NumOrdMeters_ NumEcon7Meters_"
+
+foreach dv of local dist_testvars {
+	di "* Testing by `dv'"
+	foreach v of local vars {
+		di "* -> Testing `v'* "
+		tabstat `v'*, by(`dv')
+	}
+	di "* -> Testing NumMeters_* "
+	tabstat *Meters_*_addr_ppn , by(`dv')
+	tabstat *Meters_*_delptc_ppn , by(`dv')
+}
+
+
+preserve
+	keep zonecode regioncode regionname countycode countyname districtcode districtname *200* *dif*
+	outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo.csv", comma replace
+	outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo-southwest.csv" if regionname == "South West", comma replace
+	export excel using "`dpath'/processed/llsoa-electricity-all-years-England-geo.xls", sheet("data") firstrow(variables) replace
+
+restore
+save "`dpath'/processed/llsoa-electricity-all-years-England-geo.dta", replace
+
+
+log close