diff --git a/NEED/analyse-NEED-EULF-2014-descriptives.do b/NEED/analyse-NEED-EULF-2014-descriptives.do
index 2b0b524b69214b2acc14d8a566c4b086bb043a53..f329ad2f9ee1498a17314bb88e4af3de496ed084 100644
--- a/NEED/analyse-NEED-EULF-2014-descriptives.do
+++ b/NEED/analyse-NEED-EULF-2014-descriptives.do
@@ -40,6 +40,8 @@ local dpath "`proot'/NEED/End User Licence File 2014/processed"
 local rpath "`proot'/results/NEED"
 
 local version "v1.1"
+* set sample
+local sample "100pc"
 
 * quick tests for 2012
 local do_2012_desc = 0
@@ -65,6 +67,7 @@ if `do_2012_desc' {
 	* processor intensive
 	local vars "Econs2012 Gcons2012"
 	local tvars "EE_BAND FLOOR_AREA_BAND PROP_AGE"
+	
 	* test values for valid - check for valid 0s for example. This only happens for gas where:
 	* 100 < gcons < 250 so included but rounded to nearest 500 = 0
 	
@@ -80,7 +83,7 @@ if `do_2012_desc' {
 			tab `v' if `v' < 1000
 			if `do_graphs' {
 				histogram `v' if `v'Valid == "V", by(`tv') name(h_`tv'_`v'_`sample')
-				graph export "`rpath'/NEED-EULF-2014-histo_`v'_by_`tv'_`sample'_valid.png", replace 
+				graph export "`rpath'/graphs/NEED-EULF-2014-histo_`v'_by_`tv'_`sample'_valid.png", replace 
 			
 				graph box `v' if `v'Valid == "V", over(`tv') name(b_`tv'_`v'_`sample')
 				graph export "`rpath'/NEED-EULF-2014-box_`v'_by_`tv'_`sample'_valid.png", replace 
diff --git a/NEED/analyse-NEED-EULF-2014-models-v2.0.do b/NEED/analyse-NEED-EULF-2014-models-v2.0.do
index e656ad58b2b0a61674a0a50806c904ae02cefa63..164c37769a260a7f256e0a46069eec634d6b789b 100644
--- a/NEED/analyse-NEED-EULF-2014-models-v2.0.do
+++ b/NEED/analyse-NEED-EULF-2014-models-v2.0.do
@@ -39,28 +39,28 @@ local proot "`home'/Work/Data/Social Science Datatsets/DECC"
 local dpath "`proot'/NEED/End User Licence File 2014/processed"
 local rpath "`proot'/results/NEED"
 
-*local verrsion "1.0"
+*local version "1.0"
 * initial models - all households for electricity models
 
-*local verrsion "1.1"
+*local version "1.1"
 * restrict to gas only households to avoid complications of:
 * - primary electric heating (presumably)
 * - oil heating
 
-*local version "v2a_1pc"
+*local version "v2_1pc"
 *local sample 1
 *local sampleby "EE_BAND PROP_TYPE"
 * changed from using log consumption to consumption decile to avoid complications due to variable rounding ranges in original data (see readme)
 * restricted analysis to households where gas is main heat source as it is better predicted by variables included & is more relevant to EPC (heat)
 * uses 1% sample (c 30k) making sure keep proportions of property type and EE_Band to see if linktest fails with smaller n
 
-*local version "v2b_10pc"
+*local version "v2_10pc"
 *local sample 10
 *local sampleby "EE_BAND PROP_TYPE"
 * uses 10% sample (c 300k) making sure keep proportions of property type and EE_Band to see if margin plots and co-efficients are the same 
 * (linktest etc will probably now fail due to larger n)
 
-local version "v2c_full"
+local version "v2_100pc"
 local sample 100
 local sampleby "EE_BAND PROP_TYPE"
 * uses full sample (c 3m) to see if margin plots and co-efficients are the same 
@@ -71,14 +71,14 @@ set more off
 log using "`rpath'/analyse-NEED-EULF-2014-models-`version'-$S_DATE.smcl", replace
 
 * use the pre-processed wide form file which contains all years of consumption data but not the constant values which are in the xwave file
-use "`dpath'/need_eul_may2014_consumptionfile_wide.dta", clear
+use "`dpath'/need_eul_may2014_consumptionfile_wide_`sample'pc.dta", clear
 
 * we're going to use 2012 data only
 
 keep HH_ID *2012*
 
 * merge in the pre-processed cross-year fixed values file
-merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile.dta"
+merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile_`sample'pc.dta"
 
 * check what's valid
 tab Gcons2012Valid Econs2012Valid, mi // O = off gas, V = valid, L = too low, G = too big, M = missing
@@ -87,7 +87,7 @@ tabstat Gcons2012, by(Gcons2012Valid) s(mean min max n)
 tabstat Econs2012, by(Gcons2012Valid) s(mean min max n)
 
 histogram Gcons2012, by(MAIN_HEAT_FUEL, total) name(histo_Gcons2012)
-graph export "`rpath'/histo_Gcons2012_by_main_heating_fuel.png", replace
+graph export "`rpath'/graphs/histo_Gcons2012_by_main_heating_fuel_`version'.png", replace
 
 tabstat Gcons2012, by(MAIN_HEAT_FUEL) s(n mean min max)
 
@@ -157,7 +157,7 @@ local pt101 "detached"
 local pt102 "semi"
 local pt103 "end_terr"
 local pt104 "mid_terr"
-local pt105 "bung"
+local pt105 "bungalow"
 local pt106 "flat"
 
 * now loop over the energy types & run linear regression models
@@ -190,7 +190,7 @@ foreach v of local vars {
 	di "* test EPC margins for `v'"
 	margins EE_BAND
 	marginsplot, name(mplot_`v'_EE_BAND)
-	graph export "`rpath'/mplot_`v'_EE_BAND-`version'.png", replace
+	graph export "`rpath'/graphs/mplot_`v'_EE_BAND-`version'.png", replace
 	
 	* models by property type - to see if rsq & coefficients vary
 	foreach p of local ptypes {
@@ -213,7 +213,7 @@ foreach v of local vars {
 		di "* test EPC margins for `v' (`pt`p'')"
 		margins EE_BAND
 		marginsplot, name(mplot_`v'_EE_BAND_`pt`p'')
-		graph export "`rpath'/mplot_`v'_EE_BAND_`pt`p''-`version'.png", replace
+		graph export "`rpath'/graphs/mplot_`v'_EE_BAND_`pt`p''-`version'.png", replace
 
 	}
 	* models for different consumption quintiles - to see if rsq & coefficients vary
@@ -241,11 +241,11 @@ foreach v of local vars {
 
 * output all the results - that's a lot of t tests!
 * we could put them all out in one file but it would be really hard to find the ones you want!
-estout lg2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout lg2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
+estout lg2012 using "`rpath'/models/NEED-EULF-2014-log-gas-model-`version'.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
+estout lg2012_* using "`rpath'/models/NEED-EULF-2014-log-gas-models-by-property-type-`version'.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 
-estout g2012dec using "`rpath'/NEED-EULF-2014-gas-deciles-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout g2012dec_* using "`rpath'/NEED-EULF-2014-gas-deciles-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
+estout g2012dec using "`rpath'/models/NEED-EULF-2014-gas-deciles-model-`version'.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
+estout g2012dec_* using "`rpath'/models/NEED-EULF-2014-gas-deciles-models-by-property-type-`version'.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 
 di "* Done!"