diff --git a/ONS-UK-EFS-time-series-extract.do b/ONS-UK-EFS-time-series-extract.do
new file mode 100755
index 0000000000000000000000000000000000000000..de319e4f1ed992309606f5bf717506015d94f7b8
--- /dev/null
+++ b/ONS-UK-EFS-time-series-extract.do
@@ -0,0 +1,465 @@
+*********************************************************************
+* Extracting variables from EFS 2001/2 - 2010 to produce a 'harmonised' microdata time series
+* mainly for use as input to a number of spatial microsimulation models to produce
+* small area estimates of consumption. The variables names:
+*  - c_* are designed to match to the UK 2001 Census household table/variable definitions
+* 
+* UK Expenditure Food Survey (as was) now Living Costs & Food Survey available from:
+* - http://discover.ukdataservice.ac.uk/series/?sn=2000028
+*********************************************************************
+
+/************************
+
+Copyright (c) 2002-2012 University of Essex, (c) 2012-2014  University of Southampton
+
+Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
+	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License 
+(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+#YMMV - http://en.wiktionary.org/wiki/YMMV
+
+************************/
+
+***********************
+* Processes the EFS/LCFS into a set of identical files and then merges them
+* NB it treats the Living Costs and Food Survey 2008 as simply another EFS (which it is)
+
+* History
+* 15/8/2012 moved creation of ba_quarter etc to per-year processing
+
+* TO DO
+* update to 2012
+
+* NB - the script assumes a certain folder structure for the source EFS data like so:
+* `efsd'/<year>/stata/<datafile>.dta
+* and also that this folder exists:
+* `efsd'/processed/
+***********************
+
+set more off
+
+* /Users/benander/Documents/Sociology/CRESI/Data/Social Science Datatsets/Expenditure and Food Survey
+local place = "/Users/ben/Documents/Work"
local efsd = "`place'/Data/Social Science Datatsets/Expenditure and Food Survey"
+local logd = "`efsd'/log_files"
local outd = "`efsd'/processed"
+
+* Years to be extracted
+* 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010
+
+* NB: from 2006 the EFS is collected on a calendar year basis. 
+* This means that 2005-6_Q4 and 2006_Q1 are exactly the same cases
+* This script removes 2006_Q1 later on to avoid duplication
+
+* To save time you can leave out years you have already processed
+* just paste the ones you want into the allyears local variable below
+
+local years "2001-2010" // just a label
+local all_years "2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010" // years to process
local do_years = "2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010"
+
+* set to 1 to refresh each yearly extract you listed in do_years
+local do_extracts "1"
+
+* drop the first survey for the merge as it is loaded first
+local mergeyears = "2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010"
+
+
+capture log close
+log using "`logd'/ONS-UK-EFS-time-series-extract-$S_DATE.smcl", replace
+
clear all
+
+* increase default number of variables allowed
+set maxvar 10000, perm
+
+di "*******************************************************"
+di "* 	This script will process all of the EFS files for:"
+di "*	`years'"
+di "*
+di "*	This could take some time. 
+di "*	I suggest you check it is running and then get a cup of tea....
+di "*******************************************************"
+if `do_extracts' {
+	di "do_extracts = `do_extracts', all years (`years') to be extracted and refreshed"
+
+	foreach y of local do_years {
+		* create new constraints in FES data
+		di "* * * * * * * * * "
+		di "* -> Processing `y'"
+		/* constraint vars/labels
+	
+		 * c_accom_0 c_accom_1 c_accom_2 c_accom_3 c_accom_4
+		 * 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other"
+	
		 * c_age_0	c_age_1	c_age_2	c_age_3	c_age_4	c_age_5	c_age_6	c_age_7
+		 * 0 "16-24" 1 "25-34" 2 "35-44" 3 "45-54" 4 "55-64" 5 "65-74" 6 "75+" 
+	
+		 * c_comp_0 c_comp_1 c_comp_2 c_comp_3 
		 * 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other"
+		 
	 	 * c_lli_0	c_lli_1
		 * 0 "No person with lli" 1 "At least 1 person with lli"
+	
+		 * c_ncars_0 c_ncars_1 c_ncars_2
+		 * 0 None 1 One 2 "Two or more"
+		 
+		 * c_nchild_0 c_nchild_1 c_nchild_2
		 * 0 None 1 One 2 "Two or more"
+	
		 * c_nearners_0	c_nearners_1	c_nearners_2	c_nearners_3	
+		 * 0 "0" 1 "1" 2 "2" 3 "3+"
+		 
		  * c_npersons_0	c_npersons_1	c_npersons_2	c_npersons_3	c_npersons_4	
		 * 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+"
+		 
+		 * c_nrooms_0	c_nrooms_1	c_nrooms_2 c_nrooms_3	
+		 * 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+" 
+	
+		 * c_empl_0 c_empl_1 c_empl_2 c_empl_3 c_empl_4
		 * 0 "NS-SEC 1" 1 "NS-SEC 2" 2 "NS-SEC 3" 3 "Inactive" 4 "Retired"
+	
+	 	 * c_gender_0 c_gender_1
+		 * 0 "Male" 1 "Female"
+		
		 * c_tenure_0	c_tenure_1	c_tenure_2	c_tenure_3 
		 * 0 "Owned" 1 "Rent from council" 2 "Social rent" 3 "Private rent incl rent free"
+		 
+		 * c_white_0	c_white_1	
		 * 0 "White HRP" 1 "Non-white HRP"	 
+		 
		 */
		
		*********
		di "* dv household file"
		use "`efsd'/`y'/stata/dvhh.dta", clear
		
+		* 2010 data = mixed/uppercase
+		if "`y'" === "2010" {
+			rename *, lower	
+		}
		** sex of HRP
		gen c_gender = -1
		replace c_gender = 0 if sexhrp == 1
		replace c_gender = 1 if sexhrp == 2
		lab var c_gender "Constraint: Gender of HRP"
		lab def c_gender 0 "Male" 1 "Female"
		lab val c_gender c_gender
		
		** age of HRP
+		* need to use 75+ as few 80+ after 2001-2
		recode p396p (min/15= . ) (16/24 = 0) (25/34 = 1) (35/44 = 2) (45/54 = 3) ///
			(55/64 = 4) (65/74 = 5) (75/max = 6), gen(c_age)
		lab var c_age "Constraint: Age of HRP"
+		* NB for NI need to change these as Census categories are different. Why why why!?
		label define c_age ///
			0 "16-24" ///
			1 "25-34" ///
			2 "35-44" ///
			3 "45-54" ///
			4 "55-64" ///
			5 "65-74" ///
			6 "75+" 
			
		lab val c_age c_age
		
+		** number of rooms
		*1,2,3,4+
		recode a114 (1=0) (2=1) (3=2) (4=3) (5/max=4), gen(c_nrooms)
		lab var c_nrooms "Constraint: number of rooms"
		lab def c_nrooms 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+" 
		lab val c_nrooms c_nrooms
	
		** Number of residents per household
		recode a049 (1=0) (2=1) (3=2) (4=3) (5/max=4), gen(c_npersons)
		lab var c_npersons "Constraint: number of persons in household (all ages)"
		lab def c_npersons 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+"
		lab val c_npersons c_npersons
		
		** Number of earners
		recode a054 (0=0) (1=1) (2=2) (3/max=3), gen(c_nearners)
		lab var c_nearners "Constraint: number of earners in household"
		lab def c_nearners 0 "0" 1 "1" 2 "2" 3 "3+"
		lab val c_nearners c_nearners
		
		** Cars and vans.
		* Object=
		* 0 None
		* 1 One
		* 2 'Two or more'.
		 
		*has 1-12 so need to recode.
		
		recode a124 (0=0) (1=1) (2/12=2), gen(c_ncars)
		lab var c_ncars "Constraint: cars and vans"
		lab define c_ncars 0 None 1 One 2 "Two or more"
		lab val c_ncars c_ncars 
		*tab a124 c_cars
		
		** Tenure.
		* Object =
		* 0 'Owned'
		* 1 'Rent from council'
		* 2 'Social rent'
		* 3 'Private rent' - incl rent-free
		
		*use a121.
		recode a121 (5/7=0) (1=1) (2=2) (3/4 8=3), gen(c_tenure)
		lab var c_tenure "Constraint: tenure"
		lab define c_tenure 0 "Owned" 1 "Rent from council" 2 "Social rent" 3 "Private rent incl rent free"
		lab val c_tenure c_tenure
		*tab a121 c_tenure
		
		** employment status.
		* Object =
		* 0 'NS-SEC 1'
		* 1 'NS-SEC 2'
		* 2 'NS-SEC 3'
		* 3 'Inactive'
		* 4 'Retired'.
		
		* need to combine these - a093 = activity, a094 = NS-SEC.
		* ref ONS website.
		
		* need to put a094 = 9,10,11 ('Never worked and long term unemployed',students, not stated) into 'inactive'
		recode a094 (0/2=0) (3/4=1) (5/8=2) (9/12=3), gen(c_empl)
		* the crosstab of a094 against a093 shows that some who are coded as
		* retired/unoccupied (a093=4/5) have an NS-SEC code as they are recently
		* retired/unoccupied (?). In this case we use the activity code not the NS-SEC code.
		
		replace c_empl=3 if a093==7
		replace c_empl=4 if a093==6
		
		lab var c_empl "Constraint: employment status of HRP"
		lab define c_empl 0 "NS-SEC 1" 1 "NS-SEC 2" 2 "NS-SEC 3" 3 "Inactive" 4 "Retired"
		lab val c_empl c_empl
		*tab a093 c_empl 
		*tab a094 c_empl
+				
		** Region.
		* use gorx.
		
		gen region = gorx
		label define region 	1    "North East"	2    "North West & Merseyside"	3    "Yorkshire and the Humber" ///
			4    "East Midlands" 	5    "West Midlands" 	6    "Eastern"	7    "London" 	8    "South East" ///
			9    "South West"	10    "Wales"	11    "Scotland" 	12    "Northern Ireland"
		lab var region "Govt. Office Region"
		lab val region region
		
		
		** Number of children - 16 or younger.
		*Object =
		* 0 0
		* 1 1
		* 2 2+.
		gen c_nchild = a040+a041+a042
+		
+		* could use g019?
+		
+		* has 1, 2 and more than 2 - so needs recoding.
		recode c_nchild (0=0) (1=1) (2/max=2)
		lab var c_nchild "Constraint: number of children"
		lab define c_nchild 0 None 1 One 2 "Two or more"
		lab val c_nchild c_nchild
		
		** Sorting
		sort case
+		
+		* keep all variables and base final merge on it
+		
+		qui: compress
		* save all vars
		save "`efsd'/`y'/stata/dvhh-temp.dta", replace
		
		******************************
		di "*  raw household file for c_comp and c_accom"
+		
+		* also to pick up electricity water payments periodicity etc for error analysis
		
		use "`efsd'/`y'/stata/rawhh.dta", clear
		
+		* 2010 data = mixed/uppercase
+		if "`y'" === "2010" {
+			rename *, lower	
+		}

		* accomodation type
		* Object=
		* 0 Detached
		* 1 Semi
		* 2 Terrace
		* 3 flat/maisontte
		* 4 other
		gen ba_c_accom = -1
		replace ba_c_accom = 0 if hsetype == 1
		replace ba_c_accom = 1 if hsetype == 2
		replace ba_c_accom = 2 if hsetype == 3
+		if survyr > 2001 {
+			* grr, why can't var names stay the same?!
+			gen acomtype = accom 
+		}
		replace ba_c_accom = 3 if acomtype == 2
		replace ba_c_accom = 4 if acomtype == 3
		replace ba_c_accom = 4 if acomtype == 4
		
		lab var ba_c_accom "Constraint: accommodation type"
		lab define c_accom 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other"
		lab val ba_c_accom c_accom
		
		** Composition.
		* Object =
		* 0 'married/partnered'
		* 1 'single parent'
		* 2 'single person'
		* 3 'other'.
		
		* co-habiting.
		gen ba_c_comp=1 if numcpart>0
		* married.
		replace ba_c_comp=0 if nummpart>0 
		* single parent - assumes a single adult living with 1 or more
		* children is a single parent.
		replace ba_c_comp=1 if (numadult==1 & numchild>0)
		* single person.
		replace ba_c_comp=2 if (numadult==1 & numchild==0)
		* the rest - this is a cheat!
		recode ba_c_comp (missing=3)
		lab var ba_c_comp "Constraint: household composition"
		lab define c_comp 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other"
		lab val ba_c_comp c_comp
		tab ba_c_comp numcpart
		tab ba_c_comp nummpart
		
		
		* rawhh derived cable tv dummies
		* if tvtype = 3 = cable, if = 4 is cable + phone
		gen tvtype_2 = tvtype2
		gen tvtype_3 = tvtype3
		gen tvtype_4 = tvtype4
		* if exists - gen tvtype_5 = tvtype5
		egen ba_c_has_cable_rawhh = anymatch(tvtype_*), values(3 4)
		
		* check for tvtype(1) as well
		replace ba_c_has_cable_rawhh = 1 if tvtype == 3
		
		lab var ba_c_has_cable_rawhh "Sim: Has cable based on tvtype in rawhh"
+		
+		gen ba_calyear = -1
+		
+		local keepvars_orig "waterpay watermet elecpay eacamt eacper elecpayo dveac estndordamt estndord estndo_1 estndo_2 dvestndo dsselecf dsselecp dwpelecf dwpelecp dwpper"
+		local keepvars ""
+	
+		if "`y'" == "2001-2002" | "`y'" == "2002-2003" | "`y'" == "2003-2004" | "`y'" == "2004-2005" | "`y'" == "2005-2006" | "`y'" == "2006" | "`y'" == "2007" {
+			di "* Setting up cal year for `y'"
+			tab survyr sampyear, mi
+			replace ba_sampyear = sampyear
+			* ba_calyear removed as was actually sample year
+			
+			* construct list of vars to keep based on ideal
+			foreach v of local keepvars_orig {
+				capture confirm variable `v'
+				if !_rc {
+					*di in red "weight exists"
+					su `v'
+					local keepvars = "`keepvars' `v'"
+               	}
+				else {
+					*di in red "weight does not exist"
+               }
+            }
+			di "***"
+			di "* Want: `keepvars_orig'"
+			di "* Have: `keepvars'"
+			di "***"
+			keep case ba_* wsinc water* sewsep ctwat percwat percsew percwsew ctwat `keepvars'
+			di "* Done setting up sample year for `y'"
+		}
+
+		if  "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" {
+			di "* Setting up cal year for `y'"
+			* sampyear variable removed
+			replace ba_sampyear = survyr
+		    * construct list of vars to keep based on ideal
+			foreach v of local keepvars_orig {
+				capture confirm variable `v'
+				if !_rc {
+					*di in red "weight exists"
+					local keepvars = "`keepvars' `v'"
+               	}
+				else {
+					*di in red "weight does not exist"
+               }
+            }
+			di "***"	
+			di "* Want: `keepvars_orig'"
+			di "* Have: `keepvars'"
+			di "***"
+			lab var ba_sampyear "Sample year"
+			keep case ba_* wsinc water* sewsep ctwat percwat percsew percwsew ctwat `keepvars'
+			di "* Done setting up sample year for `y'"
+		}
+				
+		* Useful vars already 'kept'
+		* fix name
+		renpfix ba_c_ c_
+		
+		qui: compress
+		sort case
+		save "`efsd'/`y'/stata/rawhh-temp.dta", replace
				
		*************
+		*
+		di "* Need rawper file for ethnicity detail"
+		use "`efsd'/`y'/stata/rawper.dta", clear
+		
+		* 2010 data = mixed/uppercase
+		if "`y'" === "2010" {
+			rename *, lower	
+		}
+		* keep HRPs
+		
+		tab hrpid, nol
+		
+		keep if hrpid == 1
+		duplicates tag case, gen(tag)
+		
+		di "* -> Any duplicates in `y' ?"
+		
+		li case person hrpid sex dvage* if tag == 1
+		
+		if "`y'" == "2005-2006" | "`y'" == "2006-2007" {
+			di "* Age error: `y'"
+			drop if dvage18 == 2
+		}
+		else if "`y'" == "2006" | "`y'" == "2007" | "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" {
+			di "Removing HRPs where age < 16 for `y'"
+			drop if dvage_p < 16
+		}
+		else {
+			di "Removing HRPs where age < 16 for `y'"
+			drop if dvage < 16
+		}
+		
+		* ethnic detail
+		
+		if "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" {
+			gen c_ethnicd = eth01p
+		}
+		else {
+			* before 2008
+			gen c_ethnicd = ethnic_p
+		}
+		lab var c_ethnicd "Detailed ethnic group"
+		lab def c_ethnicd 0 "Missing/inapplicable" 1 "White" 2 "Mixed" 3 "Asian" 4 "Black" 5 "Other"
+		lab val c_ethnicd c_ethnicd
+	
+		* keep only new variables
+		keep case c_*
+		save "`efsd'/`y'/stata/rawper-temp.dta", replace
+		
+		************
		di "* Need dvper file to count n children aged under 14 (for OECD equivalisation weight)"
+		
+
		use "`efsd'/`y'/stata/dvper.dta", clear
		
+		* 2010 data = mixed/uppercase
+		if "`y'" === "2010" {
+			rename *, lower	
+		}

		* keep only hrps
		keep if a003 == 1
+		
+		* Ethnicity 
+		* NB a012 and a013 changed categories 2007->2008 to 0 = n/a, 1 = white, 2 = everyone else
+		* this still works, you can get detail from rawper.dta
		recode a012p (1=0) (2/max=1), gen(c_white)
		lab var c_white "Constraint: non-white HRP"
		lab def c_white 0 "White HRP" 1 "Non-white HRP"
		lab val c_white c_white
		* NB - 20 HRPs are not classified - they will show up as missing
		keep case c_white a010
+		
+		qui: compress
+		
		save "`efsd'/`y'/stata/dvper-temp.dta", replace
+		
+		************
		* Now c_lli but this time need to collapse it so we count the number in the household with/out lli
+		* and count the number of children of various ages
		
+		use "`efsd'/`y'/stata/dvper.dta", clear
+	
+		* 2010 data = mixed/uppercase
+		if "`y'" === "2010" {
+			rename *, lower	
+		}
+		
+		* assume if someone is eligible for incapacity benefit then = lli
+		* even if not actually receiving
+		* in rawper dvincap = a + b
+		* in dvper a227 (1 = rec) = a a227 (2=not rec) = b
+		* ought to check prevalence with FRS
+		gen c_lli_sum = 0
+		replace c_lli_sum = 1 if a227 > 0
+		
+		**********
+		* need to count children of ages < 14 for OECD equivalence scale
+		* can then calculate n over 14 by substracting from (a040 + a041 + a042)
+		
+		gen ba_under14 = 0
+		replace ba_under14 = 1 if a005p  < 14
+		
+		* collapse to count the incidence of lli & count the number of children in each age group
+		* case is the household id
+		collapse (sum) c_lli_sum ba_*, by(case)
+		gen c_lli = 0
+		replace c_lli = 1 if c_lli_sum > 0
+		drop c_lli_sum
+		lab var c_lli "Constraint: presence of LLI"
+		lab def c_lli 0 "No person with lli" 1 "At least 1 person with lli"
+		lab var ba_under14 "Number of children aged under 14"
+
+		* keep only new variables
+		keep case ba_* c_*
+		qui: compress
+		save "`efsd'/`y'/stata/dvper-lli.dta", replace
+	
		
		di "*** MERGE ALL FILES for year = `y'"
+		
+		*
+			
		qui: merge case using ///
			"`efsd'/`y'/stata/dvhh-temp.dta" ///
+			"`efsd'/`y'/stata/rawper-temp.dta" ///
			"`efsd'/`y'/stata/dvper-temp.dta" ///
+			"`efsd'/`y'/stata/dvper-lli.dta" ///
			"`efsd'/`y'/stata/rawhh-temp.dta" , sort
+		
+		su _merge*
+			
+		* DELETE TEMPORARY FILES
+		erase "`efsd'/`y'/stata/dvhh-temp.dta"
+		erase "`efsd'/`y'/stata/rawper-temp.dta"
+		erase "`efsd'/`y'/stata/dvper-temp.dta"
+		erase "`efsd'/`y'/stata/dvper-lli.dta"
+		erase "`efsd'/`y'/stata/rawhh-temp.dta"
+		
		gen survey_year = "`y'"
+		tab survey_year
+			
+		aorder
+		* keep all (makes life easier below as some vars are in some years and not others) 
+		
+		***********************************************
+		* calculate OECD equivaisation weight in order to equivalise income or expenditure
+		* see e.g. DWP HBAI reports
+		* can then calculate n over 14 by substracting from g019 
+		
+		gen ba_over14 = 0
+		replace ba_over14 = g019 - ba_under14
+		
+		gen ba_adults = g018 if g018 > 0
+		* ignore hhs with no adults (how many are there?)
+		
+		/*
		1st adult = .67
		spouse = .33
		other 2nd adult = .33
		3rd adult = .33
		subsequent adults = .33
		children aged < 14 = .2
		children aged 14+ = .33
		*/
		* catch hh with no children
		replace ba_under14 = 0 if ba_under14 == .
		replace ba_over14 = 0 if ba_over14 == .
		
		gen oecd_equivbhcwt = 0.67 if ba_adults >= 1
		replace oecd_equivbhcwt = oecd_equivbhcwt + ((ba_adults-1) * 0.33) + (ba_under14 * 0.2) + (ba_over14 * 0.33)
		di "Calculating equiv income (OECD)"
+		* p344 & p389 changed to *p after 2006 and top coded (!)
+		if "`y'" == "2001-2002" | "`y'" == "2002-2003" | "`y'" == "2003-2004" | "`y'" == "2004-2005" | "`y'" == "2005-2006" | "`y'" == "2006" {
+			gen equiv_p344 = p344/oecd_equivbhcwt
+			gen equiv_p389bhc = p389/oecd_equivbhcwt
+			gen equiv_p389ahc = (p389-p116t)/oecd_equivbhcwt
+		}
+		else {
+			gen equiv_p344 = p344p/oecd_equivbhcwt
+			gen equiv_p389bhc = p389p/oecd_equivbhcwt
+			gen equiv_p389ahc = (p389p-p116t)/oecd_equivbhcwt
+
+		}
+		lab var equiv_p344 "Equivalised normal gross household income (OECD)"
+		lab var equiv_p389bhc "Equivalised normal disposable (net) household income (BHC, OECD)"
+		lab var equiv_p389ahc "Equivalised normal disposable (net) household income (AHC, OECD)"
+	
+		
+		* quarter labels changed in 2006
+		
+		* old:
+		* 	1	april to june
		* 	2	june to september
		* 	3	october to december
		* 	4	january to march
+		
+		* new:
+		* a099:
		*   1 january to march
		*   2 april to june
		*   3 july to september
		*   4 october to december
+	
+		*egen ba_calyear_quarter = concat(ba_calyear ba_quarter), punct("_Q")
+		
+		gen ba_month = a055
+		
+	
		***********************
+		* End of per-year processing
+		
+		gen ba_quarter = -1
+		replace ba_quarter = 1 if ba_month == 1 | ba_month == 2 | ba_month == 3
+		replace ba_quarter = 2 if ba_month == 4 | ba_month == 5 | ba_month == 6
+		replace ba_quarter = 3 if ba_month == 7 | ba_month == 8 | ba_month == 9
+		replace ba_quarter = 4 if ba_month == 10 | ba_month == 11 | ba_month == 12
+		
+		tab ba_quarter
+		
+		egen ba_calyear_quarter = concat(ba_calyear ba_quarter), punct("_Q")
+		lab var ba_calyear_quarter "EFS/FES calendar year & quarter"
+		
+		egen ba_calyear_month = concat(ba_calyear ba_month), punct("_")
+		lab var ba_calyear_month "EFS/FES calendar year & month"
+		
+		gen survey_name = "efs"
+
+		gen uk_country = 1 if region > 0 & region < 10
+		replace uk_country = 2 if region == 10
+		replace uk_country = 3 if region == 11
+		replace uk_country = 4 if region == 12
+		lab def uk_country 1 "England" 2 "Wales" 3 "Scotland" 4 "Northern Ireland"
+		lab val uk_country uk_country
+
+		
+		qui: compress
+		save "`outd'/EFS-`y'-extract-BA.dta", replace
	}
+}
+
+****************************
+* now merge them all into one big file
+
+* start with 2001/2
+use "`outd'/EFS-2001-2002-extract-BA.dta", clear
+
+foreach y of local mergeyears {
+	di "Appending `y'"
+	qui: append using "`outd'/EFS-`y'-extract-BA.dta", force
+	*erase "`efsd'/`y'/FES-`y'-extract-BA.dta"
+}
+
+tabstat c_*, c(s) s(mean min max)
+		
+lab var year "Calendar year"
lab var survey_year "EFS/FES year"
+
+* finally check for duplicate months 2005-6 -> 2006 samples
+tab ba_calyear ba_month
+tab survey_year ba_month
+
+* Jan/Feb/Mar 2006 are duplicates of Jan/Feb/Mar 2005-6
+drop if survey_year == "2006" & ba_month == 1
+drop if survey_year == "2006" & ba_month == 2
+drop if survey_year == "2006" & ba_month == 3
+
+* check
+tab ba_calyear ba_month
+
+di "Compressing"
+qui: compress 
+
+* drop fs* as only exist in 2005-6
+drop fs*
+ 
+aorder
+
+* full version
+save "`outd'/EFS-`years'-extract-BA.dta", replace
+
+* use this version to match to case in older files for specific variable input
+keep case survey_year year gorx incanon c_* ba_* weight*
+
+gen caseno = case
+
+if `do_extracts' {
+	di "do_extracts = `do_extracts', all years (`years') extracted and refreshed"
+	}
+else 
+	{
+	di "do_extracts = `do_extracts', years not extracted so individual files not refreshed"
+}
+
+su c_*
+
+save "`outd'/EFS-`years'-extract-reduced-BA.dta", replace
+
di "Job ended at $S_DATE"
+
+log close
diff --git a/README.md b/README.md
index e666001143a4c9ee318ef5329f22616af1ed6205..cf9942a806c704b92a778163d393cc16d8fac89b 100644
--- a/README.md
+++ b/README.md
@@ -3,3 +3,10 @@ UK Living Costs & Food Survey (LCFS)
 
 Collection of general scripts to process the UK Expenditure &amp; Food Survey (as was) now Living Costs &amp; Food Survey available from:
  * http://discover.ukdataservice.ac.uk/series/?sn=2000028
+
+###Terms of Use
+GPL: V2 - http://choosealicense.com/licenses/gpl-2.0/
+
+See license file for details.
+
+[YMMV](http://en.wiktionary.org/wiki/YMMV)
\ No newline at end of file