Skip to content
Snippets Groups Projects
Commit fa52d4d8 authored by Ben Anderson's avatar Ben Anderson
Browse files

updated extraction script to include 2013

parent bc03acf8
No related branches found
No related tags found
No related merge requests found
...@@ -62,12 +62,12 @@ local outd = "`efsd'/processed" ...@@ -62,12 +62,12 @@ local outd = "`efsd'/processed"
* `efsd'/processed/ * `efsd'/processed/
*********************** ***********************
local extract_years "2001-2012" // just a name for the FINAL extracted file local extract_years "2001-2013" // just a name for the FINAL extracted file
* To save time you can leave out years you have already processed * To save time you can leave out years you have already processed
* just paste the ones you want into the do_years local variable below * just paste the ones you want into the do_years local variable below
* choose any of 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012 * choose any of 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012
local do_years = "2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012" // years to process local do_years = "2013" // years to process
* set to 1 to refresh each yearly extract you listed in do_years & append the files * set to 1 to refresh each yearly extract you listed in do_years & append the files
* set to 0 to just append previously extracted files * set to 0 to just append previously extracted files
...@@ -174,7 +174,7 @@ if `do_extracts' { ...@@ -174,7 +174,7 @@ if `do_extracts' {
use "`efsd'/`y'/stata/dvhh.dta", clear use "`efsd'/`y'/stata/dvhh.dta", clear
* 2010 onwards data = mixed/uppercase * 2010 onwards data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2013" {
rename *, lower rename *, lower
} }
** sex of HRP ** sex of HRP
...@@ -298,9 +298,10 @@ if `do_extracts' { ...@@ -298,9 +298,10 @@ if `do_extracts' {
* has 1, 2 and more than 2 - so needs recoding. * has 1, 2 and more than 2 - so needs recoding.
recode c_nchild (0=0) (1=1) (2/max=2) recode c_nchild (0=0) (1=1) (2/max=2)
lab var c_nchild "Constraint: number of children" lab var c_nchild "Constraint: number of children"
lab define c_nchild 0 None 1 One 2 "Two or more" lab define c_nchild 0 "None" 1 "One" 2 "Two or more"
lab val c_nchild c_nchild lab val c_nchild c_nchild
di "Year = `y'"
gen ba_year = `y' gen ba_year = `y'
* construct list of vars to keep * construct list of vars to keep
...@@ -345,7 +346,7 @@ if `do_extracts' { ...@@ -345,7 +346,7 @@ if `do_extracts' {
use "`efsd'/`y'/stata/rawhh.dta", clear use "`efsd'/`y'/stata/rawhh.dta", clear
* 2010 onwards data = mixed/uppercase * 2010 onwards data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2013" {
rename *, lower rename *, lower
} }
...@@ -404,7 +405,7 @@ if `do_extracts' { ...@@ -404,7 +405,7 @@ if `do_extracts' {
lab var ba_sampyear "Sample year" lab var ba_sampyear "Sample year"
} }
if "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { if "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2013" {
di "* Setting up cal year for `y'" di "* Setting up cal year for `y'"
* sampyear variable removed * sampyear variable removed
* tab survyr sampyear, mi * tab survyr sampyear, mi
...@@ -444,7 +445,7 @@ if `do_extracts' { ...@@ -444,7 +445,7 @@ if `do_extracts' {
use "`efsd'/`y'/stata/rawper.dta", clear use "`efsd'/`y'/stata/rawper.dta", clear
* 2010 data = mixed/uppercase * 2010 data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2013" {
rename *, lower rename *, lower
} }
* keep HRPs * keep HRPs
...@@ -462,7 +463,7 @@ if `do_extracts' { ...@@ -462,7 +463,7 @@ if `do_extracts' {
di "* Age error: `y'" di "* Age error: `y'"
drop if dvage18 == 2 drop if dvage18 == 2
} }
else if "`y'" == "2006" | "`y'" == "2007" | "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { else if "`y'" == "2006" | "`y'" == "2007" | "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2013" {
di "Removing HRPs where age < 16 for `y'" di "Removing HRPs where age < 16 for `y'"
drop if dvage_p < 16 drop if dvage_p < 16
} }
...@@ -477,7 +478,7 @@ if `do_extracts' { ...@@ -477,7 +478,7 @@ if `do_extracts' {
di "* year = `y'" di "* year = `y'"
gen c_ethnicd = eth01p gen c_ethnicd = eth01p
} }
else if "`y'" == "2011" | "`y'" == "2012" { else if "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2013" {
di "* year = `y'" di "* year = `y'"
gen c_ethnicd = ethep gen c_ethnicd = ethep
replace c_ethnicd = ethwp if c_ethnicd == . replace c_ethnicd = ethwp if c_ethnicd == .
...@@ -519,7 +520,7 @@ if `do_extracts' { ...@@ -519,7 +520,7 @@ if `do_extracts' {
use "`efsd'/`y'/stata/dvper.dta", clear use "`efsd'/`y'/stata/dvper.dta", clear
* 2010 data = mixed/uppercase * 2010 data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2013" {
rename *, lower rename *, lower
} }
...@@ -562,7 +563,7 @@ if `do_extracts' { ...@@ -562,7 +563,7 @@ if `do_extracts' {
use "`efsd'/`y'/stata/dvper.dta", clear use "`efsd'/`y'/stata/dvper.dta", clear
* 2010 onwards data = mixed/uppercase * 2010 onwards data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2012" { if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2012" | "`y'" == "2013" {
rename *, lower rename *, lower
} }
......
* run without waiting for user
set more off
* set these to what works for you
local place = "~/Documents/Work"
local efsd = "`place'/Data/Social Science Datatsets/Expenditure and Food Survey"
local logd = "`efsd'/log_files"
local outd = "`efsd'/processed"
************************
* NB - the script assumes a certain folder structure for the source EFS/LCFS data like so:
* `efsd'/<year>/stata/<datafile>.dta
* You may have to remane some of the downloaded & unzipped UKDA data folders to make this work
* The script also assumes that this folder exists for the final results:
* `efsd'/processed/
***********************
local extract_years "2001-2013"
* To save time you can leave out years you have already processed
* just paste the ones you want into the do_years local variable below
* choose any of 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012
local do_years = "2013"
* set to 1 to refresh each yearly extract you listed in do_years & append the files
* set to 0 to just append previously extracted files
local do_extracts 1
************************
* Set the variables to be extracted here
* there is error checking below to make sure that they exist in each year or to skip if not
* put case in each keep var if otherwise empty
* -> dvhh file
* basic weighting & consumption expenditure codes
* p60*t p61*t p630*
local dvhh_keepvars = ""
* needed for income equivalisation later
local dvhh_keepvars = "`dvhh_keepvars' incanon a055 g018 g019 p116* p344* p389* p396*"
* DEMAND 2.3 (older people mobile lives)
*local dvhh_keepvars = "`dvhh_keepvars' b480 b481 b485 cc5413t c73312t"
* DEMAND 3.1 (adapting infrastructures)
* a1701 a1711
local dvhh_keepvars = "`dvhh_keepvars' a103 a108 a128 a130 a15* a16*"
* -> dvper file
local dvper_keepvars = "case"
* -> rawhh file
* DEMAND 2.3 (older people mobile lives)
local rawhh_keepvars = "flydes*"
* -> rawper file
local rawper_keepvars = "case"
************************
************************
* set logging
capture log close
log using "`logd'/ONS-UK-EFS-time-series-extract-$S_DATE.smcl", replace
clear all
* increase default number of variables allowed
set maxvar 10000, perm
di "*******************************************************"
di "* This script will process all of the EFS files for:"
di "* `years'"
di "* It will keep the following variables:"
di "* dvhh: `dvhh_keepvars' "
di "* rawhh: `rawhh_keepvars'"
di "* This could take some time. "
di "* I suggest you check it is running and then get a cup of tea...."
di "*******************************************************"
if `do_extracts' {
di "do_extracts = `do_extracts', all years (`years') to be extracted and refreshed"
foreach y of local do_years {
di "* * * * * * * * * "
di "* -> Processing `y'"
/* census vars/labels
* c_accom_0 c_accom_1 c_accom_2 c_accom_3 c_accom_4
* 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other"
* c_age_0 c_age_1 c_age_2 c_age_3 c_age_4 c_age_5 c_age_6 c_age_7
* 0 "16-24" 1 "25-34" 2 "35-44" 3 "45-54" 4 "55-64" 5 "65-74" 6 "75+"
* c_comp_0 c_comp_1 c_comp_2 c_comp_3
* 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other"
* c_lli_0 c_lli_1
* 0 "No person with lli" 1 "At least 1 person with lli"
* c_ncars_0 c_ncars_1 c_ncars_2
* 0 None 1 One 2 "Two or more"
* c_nchild_0 c_nchild_1 c_nchild_2
* 0 None 1 One 2 "Two or more"
* c_nearners_0 c_nearners_1 c_nearners_2 c_nearners_3
* 0 "0" 1 "1" 2 "2" 3 "3+"
* c_npersons_0 c_npersons_1 c_npersons_2 c_npersons_3 c_npersons_4
* 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+"
* c_nrooms_0 c_nrooms_1 c_nrooms_2 c_nrooms_3
* 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+"
* c_empl_0 c_empl_1 c_empl_2 c_empl_3 c_empl_4
* 0 "NS-SEC 1" 1 "NS-SEC 2" 2 "NS-SEC 3" 3 "Inactive" 4 "Retired"
* c_gender_0 c_gender_1
* 0 "Male" 1 "Female"
* c_tenure_0 c_tenure_1 c_tenure_2 c_tenure_3
* 0 "Owned" 1 "Rent from council" 2 "Social rent" 3 "Private rent incl rent free"
* c_white_0 c_white_1
* 0 "White HRP" 1 "Non-white HRP"
*/
******************************
* dvhh
di "* dv household file"
use "`efsd'/`y'/stata/dvhh.dta", clear
* 2010 onwards data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" {
rename *, lower
}
** sex of HRP
gen c_gender = -1
replace c_gender = 0 if sexhrp == 1
replace c_gender = 1 if sexhrp == 2
lab var c_gender "Constraint: Gender of HRP"
lab def c_gender 0 "Male" 1 "Female"
lab val c_gender c_gender
** age of HRP
* need to use 75+ as few 80+ after 2001-2
recode p396p (min/15= . ) (16/24 = 0) (25/34 = 1) (35/44 = 2) (45/54 = 3) (55/64 = 4) (65/74 = 5) (75/max = 6), gen(c_age)
lab var c_age "Constraint: Age of HRP"
* NB for NI need to change these as Census categories are different. Why why why!?
label define c_age 0 "16-24" 1 "25-34" 2 "35-44" 3 "45-54" 4 "55-64" 5 "65-74" 6 "75+"
lab val c_age c_age
** number of rooms
*1,2,3,4+
recode a114 (1=0) (2=1) (3=2) (4=3) (5/max=4), gen(c_nrooms)
lab var c_nrooms "Constraint: number of rooms"
lab def c_nrooms 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+"
lab val c_nrooms c_nrooms
** Number of residents per household
recode a049 (1=0) (2=1) (3=2) (4=3) (5/max=4), gen(c_npersons)
lab var c_npersons "Constraint: number of persons in household (all ages)"
lab def c_npersons 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+"
lab val c_npersons c_npersons
** Number of earners
recode a054 (0=0) (1=1) (2=2) (3/max=3), gen(c_nearners)
lab var c_nearners "Constraint: number of earners in household"
lab def c_nearners 0 "0" 1 "1" 2 "2" 3 "3+"
lab val c_nearners c_nearners
** Cars and vans.
* Object=
* 0 None
* 1 One
* 2 'Two or more'.
*has 1-12 so need to recode.
recode a124 (0=0) (1=1) (2/12=2), gen(c_ncars)
lab var c_ncars "Constraint: cars and vans"
lab define c_ncars 0 None 1 One 2 "Two or more"
lab val c_ncars c_ncars
*tab a124 c_cars
** Tenure.
* Object =
* 0 'Owned'
* 1 'Rent from council'
* 2 'Social rent'
* 3 'Private rent' - incl rent-free
*use a121.
recode a121 (5/7=0) (1=1) (2=2) (3/4 8=3), gen(c_tenure)
lab var c_tenure "Constraint: tenure"
lab define c_tenure 0 "Owned" 1 "Rent from council" 2 "Social rent" 3 "Private rent incl rent free"
lab val c_tenure c_tenure
*tab a121 c_tenure
** employment status.
* Object =
* 0 'NS-SEC 1'
* 1 'NS-SEC 2'
* 2 'NS-SEC 3'
* 3 'Inactive'
* 4 'Retired'.
* need to combine these - a093 = activity, a094 = NS-SEC.
* ref ONS website.
* need to put a094 = 9,10,11 ('Never worked and long term unemployed',students, not stated) into 'inactive'
recode a094 (0/2=0) (3/4=1) (5/8=2) (9/12=3), gen(c_empl)
* the crosstab of a094 against a093 shows that some who are coded as
* retired/unoccupied (a093=4/5) have an NS-SEC code as they are recently
* retired/unoccupied (?). In this case we use the activity code not the NS-SEC code.
replace c_empl=3 if a093==7
replace c_empl=4 if a093==6
lab var c_empl "Constraint: employment status of HRP"
lab define c_empl 0 "NS-SEC 1" 1 "NS-SEC 2" 2 "NS-SEC 3" 3 "Inactive" 4 "Retired"
lab val c_empl c_empl
*tab a093 c_empl
*tab a094 c_empl
** Region.
* use gorx.
gen region = gorx
label define region 1 "North East" 2 "North West & Merseyside" 3 "Yorkshire and the Humber" 4 "East Midlands" 5 "West Midlands" 6 "Eastern" 7 "London" 8 "South East" 9 "South West" 10 "Wales" 11 "Scotland" 12 "Northern Ireland"
lab var region "Govt. Office Region"
lab val region region
** Number of children - 16 or younger.
*Object =
* 0 0
* 1 1
* 2 2+.
gen c_nchild = a040+a041+a042
* could use g019?
* has 1, 2 and more than 2 - so needs recoding.
recode c_nchild (0=0) (1=1) (2/max=2)
lab var c_nchild "Constraint: number of children"
lab define c_nchild 0 None 1 One 2 "Two or more"
lab val c_nchild c_nchild
gen ba_year = `y'
* construct list of vars to keep
* if dvhh_keepvars is empty STATA will skip
local keepvars = ""
foreach v of local dvhh_keepvars {
di "* Testing for existence of `v'"
capture confirm variable `v'
if !_rc {
di "* found `v'"
local keepvars = "`keepvars' `v'"
}
else {
* exact string not found as a variable, could be because it needs expansion
* try as a varlist (forces expansion)
di "* Not found, expanding `v'"
capture noisily {
* if variable really doesn't exist this throws an exception & moves on
foreach vt of varlist `v' {
capture confirm variable `vt'
if !_rc {
di "* -> found `vt'"
local keepvars = "`keepvars' `vt'"
}
}
}
}
}
di "* dvhh: keeping case* ba_* c_* region weight* `keepvars'"
keep case* ba_* c_* region weight* `keepvars'
qui: compress
* save kept dvhh vars
save "`efsd'/`y'/stata/dvhh-temp.dta", replace
******************************
******************************
* rawhh
di "* raw household file for: c_comp and c_accom"
di "* also to pick up: `rawhh_keepvars'"
use "`efsd'/`y'/stata/rawhh.dta", clear
* 2010 onwards data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" {
rename *, lower
}
* accomodation type
* Object=
* 0 Detached
* 1 Semi
* 2 Terrace
* 3 flat/maisontte
* 4 other
gen ba_c_accom = -1
replace ba_c_accom = 0 if hsetype == 1
replace ba_c_accom = 1 if hsetype == 2
replace ba_c_accom = 2 if hsetype == 3
if survyr > 2001 {
* grr, why can't var names stay the same?!
gen acomtype = accom
}
replace ba_c_accom = 3 if acomtype == 2
replace ba_c_accom = 4 if acomtype == 3
replace ba_c_accom = 4 if acomtype == 4
lab var ba_c_accom "Constraint: accommodation type"
lab define c_accom 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other"
lab val ba_c_accom c_accom
** Composition.
* Object =
* 0 'married/partnered'
* 1 'single parent'
* 2 'single person'
* 3 'other'.
* co-habiting.
gen ba_c_comp=1 if numcpart>0
* married.
replace ba_c_comp=0 if nummpart>0
* single parent - assumes a single adult living with 1 or more
* children is a single parent.
replace ba_c_comp=1 if (numadult==1 & numchild>0)
* single person.
replace ba_c_comp=2 if (numadult==1 & numchild==0)
* the rest - this is a cheat!
recode ba_c_comp (missing=3)
lab var ba_c_comp "Constraint: household composition"
lab define c_comp 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other"
lab val ba_c_comp c_comp
tab ba_c_comp numcpart
tab ba_c_comp nummpart
if "`y'" == "2001-2002" | "`y'" == "2002-2003" | "`y'" == "2003-2004" | "`y'" == "2004-2005" | "`y'" == "2005-2006" | "`y'" == "2006" | "`y'" == "2007" {
di "* Setting up cal year for `y'"
* tab survyr sampyear, mi
gen ba_sampyear = sampyear
lab var ba_sampyear "Sample year"
}
if "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" {
di "* Setting up cal year for `y'"
* sampyear variable removed
* tab survyr sampyear, mi
gen ba_sampyear = survyr
lab var ba_sampyear "Sample year"
}
if "`y'" == "2012" {
di "* fixing issue with multiple flights varnames differently for `y'"
renpfix flydes_ flydest1
}
* construct list of vars to keep
* if rawhh_keepvars is empty STATA will skip
local keepvars = ""
foreach v of varlist `rawhh_keepvars' {
capture confirm variable `v'
if !_rc {
* di "* found `v'"
local keepvars = "`keepvars' `v'"
}
else {
di in red "`v' does not exist in `y' - will be missing"
}
}
di "* rawhh: keeping case* ba_* c_* `keepvars'"
renpfix ba_c_ c_
keep case* ba_* c_* `keepvars'
qui: compress
save "`efsd'/`y'/stata/rawhh-temp.dta", replace
******************************
******************************
* rawper
di "* Need rawper file for ethnicity detail"
use "`efsd'/`y'/stata/rawper.dta", clear
* 2010 data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" {
rename *, lower
}
* keep HRPs
tab hrpid, nol
keep if hrpid == 1
duplicates tag case, gen(tag)
di "* -> Any duplicates in `y' ?"
li case person hrpid sex dvage* if tag == 1
if "`y'" == "2005-2006" | "`y'" == "2006-2007" {
di "* Age error: `y'"
drop if dvage18 == 2
}
else if "`y'" == "2006" | "`y'" == "2007" | "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" {
di "Removing HRPs where age < 16 for `y'"
drop if dvage_p < 16
}
else {
di "Removing HRPs where age < 16 for `y'"
drop if dvage < 16
}
di "* ethnic detail"
if "`y'" == "2008" | "`y'" == "2009" | "`y'" == "2010" {
di "* year = `y'"
gen c_ethnicd = eth01p
}
else if "`y'" == "2011" | "`y'" == "2012" {
di "* year = `y'"
gen c_ethnicd = ethep
replace c_ethnicd = ethwp if c_ethnicd == .
replace c_ethnicd = ethsp if c_ethnicd == .
replace c_ethnicd = ethnip if c_ethnicd == .
}
else {
di "* year = `y'"
* before 2008
gen c_ethnicd = ethnic_p
}
lab var c_ethnicd "Detailed ethnic group"
lab def c_ethnicd 0 "Missing/inapplicable" 1 "White" 2 "Mixed" 3 "Asian" 4 "Black" 5 "Other"
lab val c_ethnicd c_ethnicd
* construct list of vars to keep
* if rawper_keepvars is empty STATA will skip
local keepvars ""
foreach v of varlist `rawper_keepvars' {
capture confirm variable `v'
if !_rc {
*di found `v'"
local keepvars = "`keepvars' `v'"
}
else {
di in red "`v' does not exist in `y' - will be missing"
}
}
* keep rawper variables
keep case* c_* `keepvars'
save "`efsd'/`y'/stata/rawper-temp.dta", replace
******************************
******************************
* dvper
di "* Need dvper file to count n children aged under 14 (for OECD equivalisation weight)"
use "`efsd'/`y'/stata/dvper.dta", clear
* 2010 data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" {
rename *, lower
}
* keep only hrps
keep if a003 == 1
* Ethnicity
* NB a012 and a013 changed categories 2007->2008 to 0 = n/a, 1 = white, 2 = everyone else
* this still works, you can get detail from rawper.dta
recode a012p (1=0) (2/max=1), gen(c_white)
lab var c_white "Constraint: non-white HRP"
lab def c_white 0 "White HRP" 1 "Non-white HRP"
lab val c_white c_white
* if HRPs are not classified - they will show up as missing
* construct list of vars to keep
* if dvper_keepvars is empty STATA will skip
local keepvars = ""
foreach v of varlist `dvper_keepvars' {
capture confirm variable `v'
if !_rc {
*di "* found `v'"
local keepvars = "`keepvars' `v'"
}
else {
di in red "`v' does not exist in `y' - will be missing"
}
}
keep case* c_white
qui: compress
save "`efsd'/`y'/stata/dvper-temp.dta", replace
******************************
************
* Now c_lli but this time need to collapse it so we count the number in the household with/out lli
* and count the number of children of various ages
use "`efsd'/`y'/stata/dvper.dta", clear
* 2010 onwards data = mixed/uppercase
if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2012" {
rename *, lower
}
* assume if someone is eligible for incapacity benefit then = lli
* even if not actually receiving
* in rawper dvincap = a + b
* in dvper a227 (1 = rec) = a a227 (2=not rec) = b
* ought to check prevalence with FRS
gen c_lli_sum = 0
replace c_lli_sum = 1 if a227 > 0
**********
* need to count children of ages < 14 for OECD equivalence scale
* can then calculate n over 14 by substracting from (a040 + a041 + a042)
gen ba_under14 = 0
replace ba_under14 = 1 if a005p < 14
* collapse to count the incidence of lli & count the number of children in each age group
* case is the household id
collapse (sum) c_lli_sum ba_*, by(case)
gen c_lli = 0
replace c_lli = 1 if c_lli_sum > 0
drop c_lli_sum
lab var c_lli "Constraint: presence of LLI"
lab def c_lli 0 "No person with lli" 1 "At least 1 person with lli"
lab var ba_under14 "Number of children aged under 14"
* keep only new variables as we've collapsed to hh
keep case* ba_* c_*
qui: compress
save "`efsd'/`y'/stata/dvper-lli.dta", replace
di "*** MERGE ALL FILES for year = `y'"
*
qui: merge case using "`efsd'/`y'/stata/dvhh-temp.dta" "`efsd'/`y'/stata/rawper-temp.dta" "`efsd'/`y'/stata/dvper-temp.dta" "`efsd'/`y'/stata/dvper-lli.dta" "`efsd'/`y'/stata/rawhh-temp.dta" , sort
su _merge*
* DELETE TEMPORARY FILES
erase "`efsd'/`y'/stata/dvhh-temp.dta"
erase "`efsd'/`y'/stata/rawper-temp.dta"
erase "`efsd'/`y'/stata/dvper-temp.dta"
erase "`efsd'/`y'/stata/dvper-lli.dta"
erase "`efsd'/`y'/stata/rawhh-temp.dta"
gen survey_year = "`y'"
tab survey_year
aorder
* keep all (makes life easier below as some vars are in some years and not others)
***********************************************
* calculate OECD equivalisation weight in order to equivalise income or expenditure
* see e.g. DWP HBAI reports
* can then calculate n over 14 by substracting from g019
gen ba_over14 = 0
replace ba_over14 = g019 - ba_under14
gen ba_adults = g018 if g018 > 0
* ignore hhs with no adults (how many are there?)
/*
1st adult = .67
spouse = .33
other 2nd adult = .33
3rd adult = .33
subsequent adults = .33
children aged < 14 = .2
children aged 14+ = .33
*/
* catch hh with no children
replace ba_under14 = 0 if ba_under14 == .
replace ba_over14 = 0 if ba_over14 == .
gen oecd_equivbhcwt = 0.67 if ba_adults >= 1
replace oecd_equivbhcwt = oecd_equivbhcwt + ((ba_adults-1) * 0.33) + (ba_under14 * 0.2) + (ba_over14 * 0.33)
di "*-> Calculating equiv income (OECD) and quartiles/deciles"
* p344, p389 & p396 changed to *p after 2006 and top coded (!)
if `y' > 2005 {
rename p344p p344
rename p389p p389
rename p396p p396
}
gen equiv_p344 = p344/oecd_equivbhcwt
gen equiv_p389bhc = p389/oecd_equivbhcwt
gen equiv_p389ahc = (p389-p116t)/oecd_equivbhcwt
lab var equiv_p344 "Equivalised normal gross household income (OECD)"
lab var equiv_p389bhc "Equivalised normal disposable (net) household income (BHC, OECD)"
lab var equiv_p389ahc "Equivalised normal disposable (net) household income (AHC, OECD)"
local incomes "incanon p344 p389"
local incanonl "anonymised hhold inc + allowances"
local p344l "gross normal weekly household income"
local p389l "normal weekly disposable hhld income"
foreach i of local incomes {
egen `i'_dec = cut(`i'), group(10)
lab var `i'_dec "Deciles: ``i'l'"
egen `i'_quart = cut(`i'), group(4)
lab var `i'_quart "Quartiles: ``i'l'"
}
* quarter labels changed in 2006
* old:
* 1 april to june
* 2 june to september
* 3 october to december
* 4 january to march
* new:
* a099:
* 1 january to march
* 2 april to june
* 3 july to september
* 4 october to december
gen ba_month = a055
* create a birth cohort variable
* remember that after 2005 age is top coded to 80
* year of birth
gen ba_birthyear = ba_sampyear - p396
* create a birth cohort variable
recode ba_birth (1900/1909=1 "1900-1909") (1910/1919=2 "1910-1919") (1920/1929=3 "1920-1929") (1930/1939=4 "1930-1939") (1940/1949=5 "1940-1949") (1950/1959=6 "1950-1959") (1960/1969=7 "1960-1969") (1970/1979=8 "1970-1979") (1980/1989=9 "1980-1989") (1990/1999=10 "1990-1999") (2000/2009=11 "2000-2009"), gen(ba_birth_cohort)
* tab ba_birth_cohort c_age, mi
***********************
* End of per-year processing
gen ba_quarter = -1
replace ba_quarter = 1 if ba_month == 1 | ba_month == 2 | ba_month == 3
replace ba_quarter = 2 if ba_month == 4 | ba_month == 5 | ba_month == 6
replace ba_quarter = 3 if ba_month == 7 | ba_month == 8 | ba_month == 9
replace ba_quarter = 4 if ba_month == 10 | ba_month == 11 | ba_month == 12
tab ba_quarter
egen ba_sampyear_quarter = concat(ba_sampyear ba_quarter), punct("_Q")
lab var ba_sampyear_quarter "EFS/FES calendar year & quarter"
egen ba_sampyear_month = concat(ba_sampyear ba_month), punct("_")
lab var ba_sampyear_month "EFS/FES calendar year & month"
gen survey_name = "efs"
gen uk_country = 1 if region > 0 & region < 10
replace uk_country = 2 if region == 10
replace uk_country = 3 if region == 11
replace uk_country = 4 if region == 12
lab def uk_country 1 "England" 2 "Wales" 3 "Scotland" 4 "Northern Ireland"
lab val uk_country uk_country
qui: compress
save "`outd'/EFS-`y'-extract-BA.dta", replace
}
}
****************************
* now merge them all into one big file
clear // start with nothing
foreach y of local do_years {
di "Appending `y'"
qui: append using "`outd'/EFS-`y'-extract-BA.dta", force
*erase "`efsd'/`y'/FES-`y'-extract-BA.dta"
}
* the above code
tabstat c_*, c(s) s(mean min max)
lab var survey_year "EFS/FES year"
* finally check for duplicate months 2005-6 -> 2006 samples
tab ba_sampyear ba_month
tab survey_year ba_month
* Jan/Feb/Mar 2006 are duplicates of Jan/Feb/Mar 2005-6
drop if survey_year == "2006" & ba_month == 1
drop if survey_year == "2006" & ba_month == 2
drop if survey_year == "2006" & ba_month == 3
* check
tab ba_sampyear ba_month
gen caseno = case
* to test
tabstat c_ncars c_nchild c_npersons c_nearners c_nrooms, by(ba_sampyear)
drop _merge* a055 g* p116t caseno
di "*-> Compressing"
qui: compress
aorder
order case* survey*
save "`outd'/EFS-`extract_years'-extract-BA.dta", replace
di "************************************************************************"
if `do_extracts' {
di "*-> do_extracts = `do_extracts', all years (`years') extracted and refreshed"
}
else
{
di "*-> do_extracts = `do_extracts', years not extracted so individual files not refreshed"
}
di "*-> Job ended at $S_DATE"
log close
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment