diff --git a/ONS-UK-EFS-time-series-extract.do b/ONS-UK-EFS-time-series-extract.do
index 555f80973f4b8c2a201934d4b02c66a530430f4a..a68ec3b4e17cdee12742507acdb5a3f5dd0304e0 100755
--- a/ONS-UK-EFS-time-series-extract.do
+++ b/ONS-UK-EFS-time-series-extract.do
@@ -31,15 +31,17 @@ GNU General Public License for more details.
 
 ***********************
 * Processes the EFS/LCFS into a set of identical files and then merges them
-* NB it treats the Living Costs and Food Survey 2008 as simply another EFS (which it is)
+* NB it treats the Living Costs and Food Survey 2008 onwards as simply another EFS (which it is)
 
 * History
-* 15/8/2012 moved creation of ba_quarter etc to per-year processing
+* 15/8/2012 moved creation of ba_quarter & income equivalisation, quartiles/deciles to per-year processing
 * 28/4/2015 - adding 2011 & 2012
 * 	LCFS database changes:
 * 		2010-2011 - 7272volume_h_changes_database_2011.xls
 * 		2011-2012 - 7472_volume_h_changes_database_2012.xls
 * 29/4/2015 - changed to only produce basic file on the assumption that this will be linked back to source expenditure data for specific analyses
+* 2/5/2015 - this does not really work that well as (for example) variable names went to capital letters in 2010 (why why why)
+*	so a mechanism has been included to allow the extraction of bespoke variables sets once this script has set all variable names to lower case
 
 * NB - the script assumes a certain folder structure for the source EFS data like so:
 * `efsd'/<year>/stata/<datafile>.dta
@@ -53,38 +55,49 @@ set more off
 local place = "/Users/ben/Documents/Work"
local efsd = "`place'/Data/Social Science Datatsets/Expenditure and Food Survey"
 local logd = "`efsd'/log_files"
local outd = "`efsd'/processed"
 
-* Years to be extracted
-* 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011
 
 * NB: from 2006 the EFS is collected on a calendar year basis. 
 * This means that 2005-6_Q4 and 2006_Q1 are exactly the same cases
 * This script removes 2006_Q1 later on to avoid duplication
 
-* To save time you can leave out years you have already processed
-* just paste the ones you want into the allyears local variable below
-
 local years "2001-2012" // just a label
-* 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011
-local all_years "2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012" // years to process
local do_years = "2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012"
 
-* set to 1 to refresh each yearly extract you listed in do_years
-local do_extracts 1
+* To save time you can leave out years you have already processed
+* just paste the ones you want into the do_years local variable below
+* choose any of 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012 
+local do_years = "2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012" // years to process
+
+* set to 1 to refresh each yearly extract you listed in do_years & append the files
+* set to 0 to just append previously extracted files
+local do_extracts 0
 
-* drop the first survey for the merge as it is loaded first
-local mergeyears = "2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012"
+************
+* Set the variables to be extracted here
+* there is error checking below to make sure that they exist in each year or to skip if not
+* put case in each keep var if otherwise empty
 
-* be sure to keep only these from the dv hh file - these will be 'kept' (along with case and ba_* or c_* after the var names have been reduced to lower case
+* -> dvhh file
 * basic weighting & consumption expenditure codes
-local dvhh_keepvars = "weight* p60*t p61*t p630* p396*"
+local dvhh_keepvars = "p60*t p61*t p630* p396*"
 * needed for income equivalisation later
 local dvhh_keepvars = "`dvhh_keepvars' incanon a055 g018 g019 p116* p344* p389*"  	
 * DEMAND 2.3 (older people mobile lives)
-local dvhh_keepvars = "`dvhh_keepvars' b480 b481 b485 c96111* c96112* cc5413* c73311* c73312*"
+local dvhh_keepvars = "`dvhh_keepvars' b480 b481 b485 cc5413t c73312t" // overseas travel expenditures
+
+* -> dvper file
+local dvper_keepvars = "case"
 
-* keep these from rawhh file
+* -> rawhh file
 * DEMAND 2.3 (older people mobile lives)
-local rawhh_keepvars = "flydes*"
+local rawhh_keepvars = "flydes*" // flights - NB see 2012 fix below in rawhh section
+
+* -> rawper file
+local rawper_keepvars = "case"
 
+************
+
+************
+* set logging
 capture log close
 log using "`logd'/ONS-UK-EFS-time-series-extract-$S_DATE.smcl", replace
 
clear all
@@ -95,18 +108,19 @@ set maxvar 10000, perm
 di "*******************************************************"
 di "* 	This script will process all of the EFS files for:"
 di "*	`years'"
-di "*
-di "*	This could take some time. 
-di "*	I suggest you check it is running and then get a cup of tea....
+di "*	It will keep the following variables:"
+di "*	dvhh: `dvhh_keepvars' "
+di "*	rawhh: `rawhh_keepvars'"
+di "*	This could take some time. "
+di "*	I suggest you check it is running and then get a cup of tea...."
 di "*******************************************************"
 if `do_extracts' {
 	di "do_extracts = `do_extracts', all years (`years') to be extracted and refreshed"
 
 	foreach y of local do_years {
-		* create new constraints in FES data
 		di "* * * * * * * * * "
 		di "* -> Processing `y'"
-		/* constraint vars/labels
+		/* census vars/labels
 	
 		 * c_accom_0 c_accom_1 c_accom_2 c_accom_3 c_accom_4
 		 * 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other"
@@ -134,7 +148,8 @@ if `do_extracts' {
 		
		 * c_tenure_0	c_tenure_1	c_tenure_2	c_tenure_3 
		 * 0 "Owned" 1 "Rent from council" 2 "Social rent" 3 "Private rent incl rent free"
 		 
 		 * c_white_0	c_white_1	
		 * 0 "White HRP" 1 "Non-white HRP"	 
-		 
		 */
		
		*********
		di "* dv household file"
		use "`efsd'/`y'/stata/dvhh.dta", clear
		
+		 
		 */
		
		******************************
+		* dvhh
		di "* dv household file"
		use "`efsd'/`y'/stata/dvhh.dta", clear
		
 		* 2010 onwards data = mixed/uppercase
 		if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" {
 			rename *, lower	
@@ -148,13 +163,27 @@ if `do_extracts' {
 		
 		* has 1, 2 and more than 2 - so needs recoding.
		recode c_nchild (0=0) (1=1) (2/max=2)
		lab var c_nchild "Constraint: number of children"
		lab define c_nchild 0 None 1 One 2 "Two or more"
		lab val c_nchild c_nchild
		
 		gen ba_year = `y'
-		
		** Sorting
		sort case
 		
-		* keep only basic variables
-		 
-		keep case* ba_* c_* region `dvhh_keepvars'
-		qui: compress
		* save all vars
		save "`efsd'/`y'/stata/dvhh-temp.dta", replace
		
		******************************
		di "* raw household file for c_comp and c_accom"
-		di "* also to pick up `rawhh_keepvars'"
		
		use "`efsd'/`y'/stata/rawhh.dta", clear
		
+		* construct list of vars to keep
+		* if dvhh_keepvars is empty STATA will skip
+		local keepvars = ""
+		foreach v of varlist `dvhh_keepvars' {
+			capture confirm variable `v'
+			if !_rc {
+				*di "* found `v'"
+				local keepvars = "`keepvars' `v'"
+            	}
+			else {
+				di in red "`v' does not exist in `y' - will be missing"
+            }
+        }
+        
+		di "* dvhh: keeping case* ba_* c_* region weight* `keepvars'"
+		keep case* ba_* c_* region weight* `keepvars'
+		qui: compress
		* save kept dvhh vars
		save "`efsd'/`y'/stata/dvhh-temp.dta", replace
		******************************
+		
		******************************
+		* rawhh
		di "* raw household file for: c_comp and c_accom"
+		di "* also to pick up: `rawhh_keepvars'"
		
		use "`efsd'/`y'/stata/rawhh.dta", clear
		
 		* 2010 onwards data = mixed/uppercase
 		if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" {
 			rename *, lower	
@@ -162,61 +191,48 @@ if `do_extracts' {
 		if survyr > 2001 {
 			* grr, why can't var names stay the same?!
 			gen acomtype = accom 
-		}
		replace ba_c_accom = 3 if acomtype == 2
		replace ba_c_accom = 4 if acomtype == 3
		replace ba_c_accom = 4 if acomtype == 4
		
		lab var ba_c_accom "Constraint: accommodation type"
		lab define c_accom 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other"
		lab val ba_c_accom c_accom
		
		** Composition.
		* Object =
		* 0 'married/partnered'
		* 1 'single parent'
		* 2 'single person'
		* 3 'other'.
		
		* co-habiting.
		gen ba_c_comp=1 if numcpart>0
		* married.
		replace ba_c_comp=0 if nummpart>0 
		* single parent - assumes a single adult living with 1 or more
		* children is a single parent.
		replace ba_c_comp=1 if (numadult==1 & numchild>0)
		* single person.
		replace ba_c_comp=2 if (numadult==1 & numchild==0)
		* the rest - this is a cheat!
		recode ba_c_comp (missing=3)
		lab var ba_c_comp "Constraint: household composition"
		lab define c_comp 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other"
		lab val ba_c_comp c_comp
		tab ba_c_comp numcpart
		tab ba_c_comp nummpart
		
		gen ba_calyear = -1
-		
+		}
		replace ba_c_accom = 3 if acomtype == 2
		replace ba_c_accom = 4 if acomtype == 3
		replace ba_c_accom = 4 if acomtype == 4
		
		lab var ba_c_accom "Constraint: accommodation type"
		lab define c_accom 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other"
		lab val ba_c_accom c_accom
		
		** Composition.
		* Object =
		* 0 'married/partnered'
		* 1 'single parent'
		* 2 'single person'
		* 3 'other'.
		
		* co-habiting.
		gen ba_c_comp=1 if numcpart>0
		* married.
		replace ba_c_comp=0 if nummpart>0 
		* single parent - assumes a single adult living with 1 or more
		* children is a single parent.
		replace ba_c_comp=1 if (numadult==1 & numchild>0)
		* single person.
		replace ba_c_comp=2 if (numadult==1 & numchild==0)
		* the rest - this is a cheat!
		recode ba_c_comp (missing=3)
		lab var ba_c_comp "Constraint: household composition"
		lab define c_comp 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other"
		lab val ba_c_comp c_comp
		tab ba_c_comp numcpart
		tab ba_c_comp nummpart
		
		
 		if "`y'" == "2001-2002" | "`y'" == "2002-2003" | "`y'" == "2003-2004" | "`y'" == "2004-2005" | "`y'" == "2005-2006" | "`y'" == "2006" | "`y'" == "2007" {
 			di "* Setting up cal year for `y'"
-			tab survyr sampyear, mi
+			* tab survyr sampyear, mi
 			gen ba_sampyear = sampyear
-			* ba_calyear removed as was actually sample year
-			
-			* construct list of vars to keep based on ideal
-			foreach v of local keepvars_orig {
-				capture confirm variable `v'
-				if !_rc {
-					*di in red "weight exists"
-					su `v'
-					local keepvars = "`keepvars' `v'"
-               	}
-				else {
-					*di in red "weight does not exist"
-               }
-            }
-			di "* Done setting up sample year for `y'"
+			lab var ba_sampyear "Sample year"
 		}
 
 		if  "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" {
 			di "* Setting up cal year for `y'"
 			* sampyear variable removed
+			* tab survyr sampyear, mi
 			gen ba_sampyear = survyr
-		    * construct list of vars to keep based on ideal
-			foreach v of local keepvars_orig {
-				capture confirm variable `v'
-				if !_rc {
-					*di in red "weight exists"
-					local keepvars = "`keepvars' `v'"
-               	}
-				else {
-					*di in red "weight does not exist"
-               }
-            }
 			lab var ba_sampyear "Sample year"
-			
-			di "* Done setting up sample year for `y'"
 		}
 		if  "`y'" == "2012" {
-			* appear to have set multiple flights varnames differently
+			di "* fixing issue with multiple flights varnames differently for `y'"
 			renpfix flydes_ flydest1
 		} 
-		keep case* ba_* `rawhh_keepvars'		
-		* Useful vars already 'kept'
-		* fix name
+		* construct list of vars to keep
+		* if rawhh_keepvars is empty STATA will skip
+		local keepvars = ""
+		foreach v of varlist `rawhh_keepvars' {
+			capture confirm variable `v'
+			if !_rc {
+				* di "* found `v'"
+				local keepvars = "`keepvars' `v'"
+            	}
+			else {
+				di in red "`v' does not exist in `y' - will be missing"
+            }
+        }
+
+		di "* rawhh: keeping case* ba_* c_* `keepvars'"
 		renpfix ba_c_ c_
+		keep case* ba_* c_* `keepvars'		
 		
 		qui: compress
-		sort case
-		save "`efsd'/`y'/stata/rawhh-temp.dta", replace
				
		*************
-		*
+
+		save "`efsd'/`y'/stata/rawhh-temp.dta", replace
		******************************
+				
		******************************
+		* rawper
 		di "* Need rawper file for ethnicity detail"
 		use "`efsd'/`y'/stata/rawper.dta", clear
 		
@@ -270,11 +286,27 @@ if `do_extracts' {
 		lab def c_ethnicd 0 "Missing/inapplicable" 1 "White" 2 "Mixed" 3 "Asian" 4 "Black" 5 "Other"
 		lab val c_ethnicd c_ethnicd
 	
-		* keep only new variables
-		keep case* c_*
+		* construct list of vars to keep
+		* if rawper_keepvars is empty STATA will skip
+		local keepvars ""
+		foreach v of varlist `rawper_keepvars' {
+			capture confirm variable `v'
+			if !_rc {
+				*di found `v'"
+				local keepvars = "`keepvars' `v'"
+            	}
+			else {
+				di in red "`v' does not exist in `y' - will be missing"
+            }
+        }
+
+		* keep rawper variables
+		keep case* c_* `keepvars'
 		save "`efsd'/`y'/stata/rawper-temp.dta", replace
+		******************************
 		
-		************
		di "* Need dvper file to count n children aged under 14 (for OECD equivalisation weight)"
+		******************************
+		* dvper
		di "* Need dvper file to count n children aged under 14 (for OECD equivalisation weight)"
 		
 
		use "`efsd'/`y'/stata/dvper.dta", clear
		
 		* 2010 data = mixed/uppercase
@@ -284,10 +316,26 @@ if `do_extracts' {
 		
 		* Ethnicity 
 		* NB a012 and a013 changed categories 2007->2008 to 0 = n/a, 1 = white, 2 = everyone else
-		* this still works, you can get detail from rawper.dta
		recode a012p (1=0) (2/max=1), gen(c_white)
		lab var c_white "Constraint: non-white HRP"
		lab def c_white 0 "White HRP" 1 "Non-white HRP"
		lab val c_white c_white
		* if HRPs are not classified - they will show up as missing
		keep case* c_white
+		* this still works, you can get detail from rawper.dta
		recode a012p (1=0) (2/max=1), gen(c_white)
		lab var c_white "Constraint: non-white HRP"
		lab def c_white 0 "White HRP" 1 "Non-white HRP"
		lab val c_white c_white
		* if HRPs are not classified - they will show up as missing
+		
+		* construct list of vars to keep
+		* if dvper_keepvars is empty STATA will skip
+		local keepvars = ""
+		foreach v of varlist `dvper_keepvars' {
+			capture confirm variable `v'
+			if !_rc {
+				*di "* found `v'"
+				local keepvars = "`keepvars' `v'"
+            	}
+			else {
+				di in red "`v' does not exist in `y' - will be missing"
+            }
+        }
+
		keep case* c_white
 		
 		qui: compress
-		
		save "`efsd'/`y'/stata/dvper-temp.dta", replace
+		
		save "`efsd'/`y'/stata/dvper-temp.dta", replace
+		******************************
 		
 		************
		* Now c_lli but this time need to collapse it so we count the number in the household with/out lli
 		* and count the number of children of various ages
		
@@ -323,7 +371,7 @@ if `do_extracts' {
 		lab def c_lli 0 "No person with lli" 1 "At least 1 person with lli"
 		lab var ba_under14 "Number of children aged under 14"
 
-		* keep only new variables
+		* keep only new variables as we've collapsed to hh
 		keep case* ba_* c_*
 		qui: compress
 		save "`efsd'/`y'/stata/dvper-lli.dta", replace
@@ -394,8 +442,6 @@ if `do_extracts' {
 		* new:
 		* a099:
		*   1 january to march
		*   2 april to june
		*   3 july to september
		*   4 october to december
 	
-		*egen ba_calyear_quarter = concat(ba_calyear ba_quarter), punct("_Q")
-		
 		gen ba_month = a055
 		
 		* create a birth cohort variable
@@ -421,11 +467,11 @@ if `do_extracts' {
 		
 		tab ba_quarter
 		
-		egen ba_calyear_quarter = concat(ba_calyear ba_quarter), punct("_Q")
-		lab var ba_calyear_quarter "EFS/FES calendar year & quarter"
+		egen ba_sampyear_quarter = concat(ba_sampyear ba_quarter), punct("_Q")
+		lab var ba_sampyear_quarter "EFS/FES calendar year & quarter"
 		
-		egen ba_calyear_month = concat(ba_calyear ba_month), punct("_")
-		lab var ba_calyear_month "EFS/FES calendar year & month"
+		egen ba_sampyear_month = concat(ba_sampyear ba_month), punct("_")
+		lab var ba_sampyear_month "EFS/FES calendar year & month"
 		
 		gen survey_name = "efs"
 
@@ -444,21 +490,21 @@ if `do_extracts' {
 ****************************
 * now merge them all into one big file
 
-* start with 2001/2
-use "`outd'/EFS-2001-2002-extract-BA.dta", clear
+clear // start with nothing
 
-foreach y of local mergeyears {
+foreach y of local do_years {
 	di "Appending `y'"
 	qui: append using "`outd'/EFS-`y'-extract-BA.dta", force
 	*erase "`efsd'/`y'/FES-`y'-extract-BA.dta"
 }
 
+* the above code 
 tabstat c_*, c(s) s(mean min max)
 		
 lab var survey_year "EFS/FES year"
 
 * finally check for duplicate months 2005-6 -> 2006 samples
-tab ba_calyear ba_month
+tab ba_sampyear ba_month
 tab survey_year ba_month
 
 * Jan/Feb/Mar 2006 are duplicates of Jan/Feb/Mar 2005-6
@@ -467,7 +513,7 @@ drop if survey_year == "2006" & ba_month == 2
 drop if survey_year == "2006" & ba_month == 3
 
 * check
-tab ba_calyear ba_month
+tab ba_sampyear ba_month
 
 
 gen caseno = case
@@ -475,7 +521,7 @@ gen caseno = case
 * to test
 tabstat c_ncars c_nchild c_npersons c_nearners c_nrooms,  by(ba_sampyear)
 
-drop _merge* a055 g* p116t
+drop _merge* a055 g* p116t caseno
 
 di "*-> Compressing"
 qui: compress