diff --git a/NEED/analyse-NEED-EULF-2014-electricity-consumption.do b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do
index a4f945de8a014da4dfec65329b839adf11ed1501..96635e0739f904cf71db24a904a15dd81fc8a74a 100644
--- a/NEED/analyse-NEED-EULF-2014-electricity-consumption.do
+++ b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do
@@ -35,11 +35,8 @@ GNU General Public License for more details.
 
 
 * Requires: estout
-
 clear all
 
-capture noisily log close
-
 set more off
 
 * written for Mac OSX - remember to change filesystem delimiter for other platforms
@@ -47,11 +44,16 @@ global home "~"
 
 global dpath "$home/Documents/Work/Data/Social Science Datasets/DECC/NEED/End User Licence File 2014/processed"
 
-global rpath "$home/Dropbox/RSS-2015/results"
+*global rpath "$home/Dropbox/RSS-2015/NEED/results"
+global rpath "$home/Documents/Work/Papers and Conferences/RSS-2015/NEED/results"
 
-local version "v1"
+local version "v1" // version management via github
 
-log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace
+capture noisily log close _all
+
+* start main log
+* each subsection has own log
+log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_main.smcl", replace name(main)
 
 * set sample
 * 100 = 100pc
@@ -69,7 +71,8 @@ local need_weight = "[pw = WEIGHT]" // use probability weights
 local do_desc = 0 // do descriptives
 local do_histo = 0 // toggle graph drawing
 local do_box = 0 // toggle graph drawing
-local do_xsec = 1 // run cross-sectional analysis
+local do_excl = 0 // tests for correlates with exclusion
+local do_xsec = 0 // run cross-sectional analysis
 local do_longit = 1 // run longitudinal analysis
 
 
@@ -111,13 +114,19 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0
 * set up correct long form 'is X present in year' using year (if known)
 local vars "BOILER LI CWI"
 
-* what will hapen if there are multiple replacements in a household
+* what will hapen if there are multiple replacements in a household?
 foreach v of local vars {
 	gen ba_have_`v' = 0
 	destring `v'_YEAR, force replace
 	replace ba_have_`v' = 1 if `v'_YEAR <= year
 }
 
+* make sure test vars are destringed
+local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER"
+foreach tv of local tvars {
+	destring `tv', force replace
+}
+
 * labels
 lab var Econs "Electricity (kWh/year)"
 lab var Gcons "Gas (kWh/year)"
@@ -140,6 +149,13 @@ foreach v of local setupvars {
 	lab val `v'Validr `v'Validr
 	tab `v'Validr `v'Valid
 
+	
+	gen `v'_excl_hi = 0 if `v'Validr == 1 // valid
+	replace `v'_excl_hi = 1 if `v'Validr == 4 // high
+	gen `v'_excl_lo = 0 if `v'Validr == 1 // valid
+	replace `v'_excl_lo = 1 if `v'Validr == 3 // low
+	
+
 	* set up consumption deciles and outlier flags
 	gen u99_`v' = 0
 	gen l99_`v' = 0
@@ -149,7 +165,7 @@ foreach v of local setupvars {
 	levelsof(year), local(levels)
 	foreach l of local levels {
 		di "* Calculating consumption deciles and outlier flags for `v' for `l'"
-		
+				
 		* creates missing for other years have to do this as egen does not allow by & can't 'replace'
 		di "* `v' deciles for `l'"
 		egen `v'_dec_`l' = cut(`v') if year == `l', group(10)
@@ -173,6 +189,7 @@ foreach v of local setupvars {
 		*tab m90_`v' if year == `l', mi
 		
 	}
+	
 	* now combine the deciles - set missing option otherwise it counts a row where all are missing as 0
 	egen `v'_dec = rowtotal(`v'_dec_*), missing
 	* remove temporary ones
@@ -190,9 +207,6 @@ foreach v of local setupvars {
 	lab var log`v' "Log `v'"
 }
 
-* set 'survey' weight
-svyset `need_weight'
-
 * flag dwellings which are off gas for electricity
 * NB - in this dataset we don't know if they use electricity as main heat (could be oil)
 gen ba_off_gas = 0
@@ -200,19 +214,25 @@ replace ba_off_gas = 1 if  GconsValidr == 2
 lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)"
 lab val ba_off_gas ba_off_gas
 
-* check
-svy: mean Gcons Econs, over(ba_off_gas)
-di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
+* add Gcons to loop over gas
+local descvars "Econs"
+if `do_desc' {
+	log off main
+	log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'-do_desc.smcl", replace name(do_desc)
+	
+	* set 'survey' weight
+	svyset `need_weight'
+	* check
+	svy: mean Gcons Econs, over(ba_off_gas)
+	di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
 
-table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
+	table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
 
-table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas)
-* roughly constant rate throughout years
-table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons)
-* but off gas have no gas readings as you'd expect (DECC applied filter)
+	table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas)
+	* roughly constant rate throughout years
+	table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons)
+	* but off gas have no gas readings as you'd expect (DECC applied filter)
 
-local descvars "Econs"
-if `do_desc' {
 	foreach v of local descvars {
 		di "***************"
 		di "* Testing `v' for `sample'% sample"
@@ -268,26 +288,72 @@ if `do_desc' {
 			graph export "$rpath/graphs/NEED-EULF-2014-`sample'pc-box_`v'_yr_ee_valid.png", replace
 
 		}
-		
-		di "* check the distributions of the outliers"
-		local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG"
-		foreach tv of local tvars {
-			di "* Checking top 1% against `tv'"
-			tab `tv' u99_Econs , col
-		}  
-		
+	}
+	log close do_desc
+	log on main
+}
 
+if `do_excl' {
+	log off main
+	log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_excl.smcl", replace name(do_excl)
+	di "* running exclusion analysis"
+	di "* check the distributions of the outliers"
+	di "* % excluded as too high"
+	tab year Econs_excl_hi
+	di "* % excluded as too low"
+	tab year Econs_excl_lo
+	di "* exclusion models"
+
+	local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG"
+	foreach tv of local tvars {
+		di "* Checking Econs lowest 1% (+L) against `tv'"
+		tab `tv' u99_Econs , col
+		di "* Checking Econs highest 1% (+G) against `tv'"
+		tab `tv' u99_Econs , col
+
+		di "* Checking Econs L against `tv'"
+		tab `tv' Econs_excl_lo , col
+		di "* Checking Econs G against `tv'"
+		tab `tv' Econs_excl_hi , col
+	}  
+
+	levelsof(year), local(years)
+	foreach y of local years {
+		di "* testing exclusions for `y'"
+		* use capture to avoid models failing where no exclusions (2010 ->)
+		capture noisily {
+			di "* Hi"
+			qui: logit Econs_excl_hi i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE ///
+				i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas ///
+				if year == `y'
+			estat gof
+			est store Econs_excl_hi_`y'
+		}
 
+		capture noisily {
+			di "* Lo"
+			qui: logit Econs_excl_lo i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE ///
+				i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas ///
+				if year == `y'
+			estat gof
+			est store Econs_excl_lo_`y'
+		}
 	}
+	estout Econs_excl_hi* using "$rpath/logit_Econs_excl_hi_`version'.txt", ///
+		cells("b se ci_u ci_l _star") ///
+		stats(r2_p ll N) replace
+	estout Econs_excl_lo* using "$rpath/logit_Econs_excl_lo_`version'.txt", ///
+		cells("b se ci_u ci_l _star") ///
+		stats(r2_p ll N) replace
+	
+	log close do_excl
+	log on main
 }
 
 if `do_xsec' {
-	di "* Running cross sectional analysis using 2012 (all valid cases)"
-	* make sure test vars are destringed
-	local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER"
-	foreach tv of local tvars {
-		destring `tv', force replace
-	}
+	log off main
+	log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_xsec.smcl", replace name(do_xsec)
+	di "* Running cross sectional analysis for electricity using 2012 (all valid cases)"
 	* kitchen sink model - use 99% signif level
 	* use vce(robust) as running without & using hettest suggests heteroscedasticity
 	* regress would throw out collinear variables
@@ -309,11 +375,11 @@ if `do_xsec' {
 	* Plot residuals
 	* pnormal plot of residuals
 	pnorm logEconsr, name(pnorm_logEconsr)
-	graph export "$rpath/graphs/pnorm_logEconsr.png", replace
+	graph export "$rpath/graphs/pnorm_logEconsr_2012.png", replace
 	
 	* qnormal plot
 	qnorm logEconsr, name(qnorm_logEconsr)
-	graph export "$rpath/graphs/qnorm_logEconsr.png", replace
+	graph export "$rpath/graphs/qnorm_logEconsr_2012.png", replace
 
 	* test the null hypothesis that the variance of the residuals is homogenous. 
 	* Therefore, if the p-value is very small, we would have to reject the hypothesis 
@@ -348,10 +414,21 @@ if `do_xsec' {
 	
 	* test BIC etc
 	estat ic
+	
+	* preduce margins plot for floor area
+	margins i.FLOOR_AREA_BAND
+	marginsplot
+
+	graph box Econs if year == 2012, over(FLOOR_AREA_BAND)
+	graph export "$rpath/graphs/box_Econs_floor_area_2012.png", replace
 
+	log close do_xsec
+	log on main
 }
 
 if `do_longit' {
+	log off main
+	log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_longit.smcl", replace name(do_longit)
 	di "* Running longitudinal analysis"
 	di "* Check boiler transitions"
 	xttrans ba_have_BOILER, freq
@@ -415,8 +492,10 @@ if `do_longit' {
 		est store xtr_re_logEcons
 		estout xtr_re_logEcons using "$rpath/xtr_re_logEcons_`version'.txt",  cells("b se ci_u ci_l _star ") stats(r2_w r2_b r2_o rmse  N sigma_u sigma_e, fmt(%9.3f %9.0g)) replace
 	}
+	log close do_longit
+	log on main
 }
 
 di "* Done!"
 
-log close
+log close main