diff --git a/ONS-UK-EFS-time-series-extract.do b/ONS-UK-EFS-time-series-extract.do index 917f3f56e6fa1e0abe416e94e2f9178ebcff9b4f..16ece5c03db4ea87093f856629002c169917e18a 100755 --- a/ONS-UK-EFS-time-series-extract.do +++ b/ONS-UK-EFS-time-series-extract.do @@ -1,4 +1,4 @@ -********************************************************************* +/********************************************************************* * Extracting variables from EFS 2001/2 onwards to produce a 'harmonised' microdata time series * mainly for use as input to a number of spatial microsimulation models to produce * small area estimates of consumption. The variables names: @@ -6,32 +6,13 @@ * * UK Expenditure Food Survey (as was) now Living Costs & Food Survey available from: * - http://discover.ukdataservice.ac.uk/series/?sn=2000028 -********************************************************************* -/************************ - -Copyright (c) 2002-2012 University of Essex, (c) 2012-2015 University of Southampton - -Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) - [Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton] - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License -(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -#YMMV - http://en.wiktionary.org/wiki/YMMV - -************************/ - -*********************** * Processes the EFS/LCFS into a set of identical files and then merges them -* NB it treats the Living Costs and Food Survey 2008 onwards as simply another EFS (which it is) +* Treats the Living Costs and Food Survey 2008 onwards as simply another EFS (which it is) + +* From 2006 the EFS is collected on a calendar year basis. +* This means that 2005-6_Q4 and 2006_Q1 are exactly the same cases +* This script removes 2006_Q1 to avoid duplication * History * 15/8/2012 moved creation of ba_quarter & income equivalisation, quartiles/deciles to per-year processing @@ -42,24 +23,41 @@ GNU General Public License for more details. * 29/4/2015 - changed to only produce basic file on the assumption that this will be linked back to source expenditure data for specific analyses * 2/5/2015 - this does not really work that well as (for example) variable names went to capital letters in 2010 (why why why) * so a mechanism has been included to allow the extraction of bespoke variables sets once this script has set all variable names to lower case - -* NB - the script assumes a certain folder structure for the source EFS data like so: -* `efsd'/<year>/stata/<datafile>.dta -* and also that this folder exists: -* `efsd'/processed/ -*********************** - +* +* Copyright (c) 2002-2012 University of Essex, (c) 2012-2015 University of Southampton +* +* Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) +* [Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton] +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License +* (http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* #YMMV - http://en.wiktionary.org/wiki/YMMV +**********************************************************************/ + +* run without waiting for user set more off +* set these to what works for you local place = "/Users/ben/Documents/Work" local efsd = "`place'/Data/Social Science Datatsets/Expenditure and Food Survey" local logd = "`efsd'/log_files" local outd = "`efsd'/processed" +************************ +* NB - the script assumes a certain folder structure for the source EFS/LCFS data like so: +* `efsd'/<year>/stata/<datafile>.dta +* You may have to remane some of the downloaded & unzipped UKDA data folders to make this work +* The script also assumes that this folder exists for the final results: +* `efsd'/processed/ +*********************** -* NB: from 2006 the EFS is collected on a calendar year basis. -* This means that 2005-6_Q4 and 2006_Q1 are exactly the same cases -* This script removes 2006_Q1 later on to avoid duplication - -local years "2001-2012" // just a label +local extract_years "2001-2012" // just a name for the FINAL extracted file * To save time you can leave out years you have already processed * just paste the ones you want into the do_years local variable below @@ -68,9 +66,9 @@ local do_years = "2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 20 * set to 1 to refresh each yearly extract you listed in do_years & append the files * set to 0 to just append previously extracted files -local do_extracts 0 +local do_extracts 1 -************ +************************ * Set the variables to be extracted here * there is error checking below to make sure that they exist in each year or to skip if not * put case in each keep var if otherwise empty @@ -92,10 +90,9 @@ local rawhh_keepvars = "flydes*" // flights - NB see 2012 fix below in rawhh sec * -> rawper file local rawper_keepvars = "case" +************************ -************ - -************ +************************ * set logging capture log close log using "`logd'/ONS-UK-EFS-time-series-extract-$S_DATE.smcl", replace @@ -406,7 +403,7 @@ if `do_extracts' { gen ba_adults = g018 if g018 > 0 * ignore hhs with no adults (how many are there?) - /* 1st adult = .67 spouse = .33 other 2nd adult = .33 3rd adult = .33 subsequent adults = .33 children aged < 14 = .2 children aged 14+ = .33 */ * catch hh with no children replace ba_under14 = 0 if ba_under14 == . replace ba_over14 = 0 if ba_over14 == . gen oecd_equivbhcwt = 0.67 if ba_adults >= 1 replace oecd_equivbhcwt = oecd_equivbhcwt + ((ba_adults-1) * 0.33) + (ba_under14 * 0.2) + (ba_over14 * 0.33) di "Calculating equiv income (OECD) and quartiles/deciles" + /* 1st adult = .67 spouse = .33 other 2nd adult = .33 3rd adult = .33 subsequent adults = .33 children aged < 14 = .2 children aged 14+ = .33 */ * catch hh with no children replace ba_under14 = 0 if ba_under14 == . replace ba_over14 = 0 if ba_over14 == . gen oecd_equivbhcwt = 0.67 if ba_adults >= 1 replace oecd_equivbhcwt = oecd_equivbhcwt + ((ba_adults-1) * 0.33) + (ba_under14 * 0.2) + (ba_over14 * 0.33) di "*-> Calculating equiv income (OECD) and quartiles/deciles" * p344, p389 & p396 changed to *p after 2006 and top coded (!) if `y' > 2005 { @@ -529,7 +526,7 @@ aorder order case* survey* -save "`outd'/EFS-`years'-extract-reduced-BA.dta", replace +save "`outd'/EFS-`extract_years'-extract-BA.dta", replace di "************************************************************************" if `do_extracts' {