diff --git a/NEED/analyse-NEED-EULF-2014-descriptives.do b/NEED/analyse-NEED-EULF-2014-descriptives.do
index 6350117c77c0a39773a72fdfe62c9607e48e99a3..0aa7a3ca12e53844f4235d06204e02b3e3f8e463 100644
--- a/NEED/analyse-NEED-EULF-2014-descriptives.do
+++ b/NEED/analyse-NEED-EULF-2014-descriptives.do
@@ -39,6 +39,7 @@ if `do_2014_desc' {
 	use "`proot'/NEED/End User Licence File 2014/UKDA-7518-stata11/stata11/need_eul_may2014.dta", clear
 	
 	* distributions for 2012 (to test)
+	* processor intensive
 	local vars "Econs2012 Gcons2012"
 	local tvars "EE_BAND FLOOR_AREA_BAND PROP_AGE"
 	foreach v of local vars {
@@ -56,6 +57,13 @@ if `do_long_desc' {
 	* Now use the pre-processed long form file which contains all years of consumption data but not the constant values (housing charactersitics etc) which are in the xwave file
 	use "`dpath'/need_eul_may2014_longfile.dta", clear
 	
+	* set as panel
+	xtset HH_ID year, delta(1 year)
+
+	xtdescribe 
+	
+	xtsum Econs Gcons
+	
 	* summarise Electricity
 	table EconsValid year, c(count Econs min Econs mean Econs max Econs)
 	* summarise Gas
diff --git a/NEED/process-NEED-EULF-2014.do b/NEED/process-NEED-EULF-2014.do
index 8c7a4efb59114d1cfdb218b0a4a8a0ecc1ba7cc0..c182c73a3fef258262532077585e14e3fa5f99dd 100644
--- a/NEED/process-NEED-EULF-2014.do
+++ b/NEED/process-NEED-EULF-2014.do
@@ -35,7 +35,17 @@ local dpath "`proot'/NEED/End User Licence File 2014/"
 local ifile "need_eul_may2014"
 * original data file
 local dfile_orig "`dpath'UKDA-7518-stata11/stata11/`ifile'.dta"
-local version "v1"
+
+* 10 = 10% sample, 50 = 50% sample, 100 = 100% sample
+local sample 10
+local samplet "`sample'pc"
+local sampleby "EE_BAND PROP_TYPE"
+
+local version "v1.1"
+* includes production of % samples which maintain the original dimensions used to 
+* produce the EULF samples: EE_BAND PROP_TYPE
+
+*local version "v1"
 
 set more off
 
@@ -43,17 +53,20 @@ log using "`dpath'/processed/process-NEED-EULF-2014-`version'-$S_DATE.smcl", rep
 
 * use these locals to control what happens (set to 0 to skip the code)
 * create codebook & some descriptives
-local create_codebook = 1
+local create_codebook = 0
 * create wide form fixed file with (supposedly) unchanging data & a seperate 'wide' consumption data file for cross-sectional analysis
 local create_xwavefile = 1
 * create long form file with wave (yearly) data - be careful, this take a long time due to large memory use!
-local create_longfile = 0
+local create_longfile = 1
+
+* load the original file
+use "`dfile_orig'", clear
 
 if `create_codebook' {
-	* create the codebook
+	* create original EULF codebook
+	* not much point running thid for each % sample although the counts etc reported in the codebook won't match
 	log off main
-	log using "`dpath'/processed/codebook-NEED-EULF-2014-`version'-$S_DATE.smcl", replace name(cb)
-	use "`dfile_orig'", clear
+	log using "`dpath'/processed/codebook-NEED-EULF-2014-`version'-`sample'pc-$S_DATE.smcl", replace name(cb)
 	desc
 	di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?"
 	codebook
@@ -61,13 +74,21 @@ if `create_codebook' {
 	log on main
 }
 
+***** random sample ****
+* select a random sample but ensure proportions of sampleby are kept
+di "* Keeping `sample'% sample by `sampleby'"
+sample `sample', by(`sampleby')
+
+tab `sampleby', mi
+
+
 if `create_xwavefile' {
 	* create the file with data that (notionally) doesn't change
-	use "`dfile_orig'", clear
+
 	* create a wide consumption file
 	preserve
 		keep HH_ID Gcons* Econs*
-		save "`dpath'/processed/`ifile'_consumptionfile_wide.dta", replace
+		save "`dpath'/processed/`ifile'_consumptionfile_wide_`samplet'.dta", replace
 	restore
 	drop Gcons* Econs*
 	
@@ -114,12 +135,12 @@ if `create_xwavefile' {
 	
 	compress
 	log off main
-	log using "`dpath'/processed/codebook-NEED-EULF-2014-`ifile'_xwavefile-`version'-$S_DATE.smcl", replace name(cb_xwave)
+	log using "`dpath'/processed/codebook-NEED-EULF-2014-`ifile'_xwavefile-`version'-`samplet'-$S_DATE.smcl", replace name(cb_xwave)
 	desc
 	di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?"
 	di "** urban/rural would be helpful"
 	codebook
-	save "`dpath'/processed/`ifile'_xwavefile.dta", replace
+	save "`dpath'/processed/`ifile'_xwavefile_`samplet'.dta", replace
 	log close cb_xwave
 	log on main
 }
@@ -127,7 +148,7 @@ if `create_xwavefile' {
 if `create_longfile' {
 	* create the long file with as few vars as possible (quicker)
 	* still takes a while...
-	use "`dfile_orig'.dta", clear
+
 	keep HH_ID Gcons* Econs*
 
 	* panel vars:
@@ -143,24 +164,24 @@ if `create_longfile' {
 			drop `v'`y'Valid
 		}
 	}
-	* this takes a LONG time - avoid running many times!
+	* this takes a LONG time for the full dataset
 	reshape long Gcons GconsValid Econs EconsValid, i(HH_ID)
 	rename _j year
 	* set as panel
-	xtset HH_ID year, delta(1 year)
+	xtset HH_ID year
 	compress
-	save "`dpath'/processed/`ifile'_consumptionfile_long.dta", replace
+	save "`dpath'/processed/`ifile'_consumptionfile_long_`samplet'.dta", replace
 }
 
 /*
-* THIS TAKES AGES and creates a 1.5 GB file - use with care
+* THIS TAKES AGES and creates a 1.5 GB file for the full dataset - use with care
 * now just merge them
 * start with long file which may or may not have just been re-created
 use "`dpath'/`dfile'_consumptionfile_long.dta", clear
 
 merge m:1 HH_ID using "`dpath'/`dfile'_xwavefile.dta"
 
-save "`dpath'/`dfile'_consumptionfile_long_complete.dta", replace
+save "`dpath'/`dfile'_consumptionfile_long_complete_`samplet'.dta", replace
 */
 
 log close _all