diff --git a/NEED/analyse-NEED-EULF-2014.do b/NEED/analyse-NEED-EULF-2014.do
index c5b57101dfe8e977ee6f22d244715b3f39957b84..e403649b3e931368c2dccd177a356021c7d6571f 100644
--- a/NEED/analyse-NEED-EULF-2014.do
+++ b/NEED/analyse-NEED-EULF-2014.do
@@ -103,6 +103,10 @@ local pt106 "flat"
 * now loop over the energy types
 local vars "Gcons Econs Allcons"
 foreach v of local vars {
+	* check distributions of original consumption values
+	histogram `v'2012, by(MAIN_HEAT_FUEL, total) name(histo_`v')
+	tabstat `v'2012, by(MAIN_HEAT_FUEL) s(n mean min max)
+	
 	* all hhs model
 	qui: regress log_`v'2012 `generic_hvars' ///
 		`generic_rvars' ///
@@ -113,7 +117,7 @@ foreach v of local vars {
 	estat ovtest
 	estat hettest
 	
-	di "* -> `v' linktest"
+	di "* -> `v' linktest to test for model specification"
 	di "* if p of _hatsq < 0.05 -> mis-spec"
 	di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
 	linktest	
@@ -131,7 +135,7 @@ foreach v of local vars {
 		estat ovtest
 		estat hettest
 		
-		di "* -> `v' `pt`p'' linktest"
+		di "* -> `v' `pt`p'' linktest to test for model specification"
 		di "* if p of _hatsq < 0.05 -> mis-spec"
 		di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
 		linktest	
@@ -162,8 +166,8 @@ foreach v of local vars {
 estout rlog_Gcons2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 estout rlog_Gcons2012q* using "`rpath'/NEED-EULF-2014-log-gas-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 estout rlog_Gcons2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout rlog_Econs2012 using "`rpath'/NEED-EULF-2014-log-elec-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 N ll)
-estout rlog_Econs2012q* using "`rpath'/NEED-EULF-2014-log-elec-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 N ll)
+estout rlog_Econs2012 using "`rpath'/NEED-EULF-2014-log-elec-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
+estout rlog_Econs2012q* using "`rpath'/NEED-EULF-2014-log-elec-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 estout rlog_Econs2012_* using "`rpath'/NEED-EULF-2014-log-elec-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 estout rlog_Allcons2012 using "`rpath'/NEED-EULF-2014-log-energy-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 estout rlog_Allcons2012q* using "`rpath'/NEED-EULF-2014-log-energy-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
diff --git a/NEED/process-NEED-EULF-2014.do b/NEED/process-NEED-EULF-2014.do
index f4d521ec3f39ec3d525d7c874375bad72f86babd..371035251f0d2534f00be7370b5f872e7cf469b5 100644
--- a/NEED/process-NEED-EULF-2014.do
+++ b/NEED/process-NEED-EULF-2014.do
@@ -33,6 +33,7 @@ local proot "`home'/Work/Data/Social Science Datatsets/DECC"
 local dpath "`proot'/NEED/End User Licence File 2014/"
 * NB this is the 2014 EULF we're using
 local ifile "need_eul_may2014"
+* original data file
 local dfile_orig "`dpath'UKDA-7518-stata11/stata11/`ifile'.dta"
 local version "v1"
 
@@ -40,7 +41,8 @@ set more off
 
 log using "`dpath'/processed/process-NEED-EULF-2014-`version'-$S_DATE.smcl", replace name(main)
 
-* create fcodebook & some descriptives
+* use these locals to control what happens (set to 0 to skip the code)
+* create codebook & some descriptives
 local create_codebook = 1
 * create wide form fixed file with (supposedly) unchanging data & a seperate 'wide' consumption data file for cross-sectional analysis
 local create_xwavefile = 1
@@ -48,7 +50,7 @@ local create_xwavefile = 1
 local create_longfile = 0
 
 if `create_codebook' {
-	* create the file with data that (notionally) doesn't change
+	* create the codebook
 	log off main
 	log using "`dpath'/processed/codebook-NEED-EULF-2014-`version'-$S_DATE.smcl", replace name(cb)
 	use "`dfile_orig'", clear
@@ -82,7 +84,7 @@ if `create_xwavefile' {
 	replace FP_ENG = . if LOFT_DEPTH  == 99
 	replace LOFT_DEPTH = . if LOFT_DEPTH  == 99
 	
-	* no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?
+	* what do G, L, M mean in the gas 'valid' variables - presumably 0 = off gas & V = valid?
 	
 	tabstat IMD_WALES, by(REGION) s(mean min max n)
 	* there seem to be some welsh LSOAs allocated to English GORs?
@@ -149,7 +151,7 @@ if `create_longfile' {
 }
 
 /*
-* THIS TAKES AGES and creates a 1.5 GB file!!!
+* THIS TAKES AGES and creates a 1.5 GB file - use with care
 * now just merge them
 * start with long file which may or may not have just been re-created
 use "`dpath'/`dfile'_consumptionfile_long.dta", clear