diff --git a/ONS-UK-EFS-LCFS-time-series-extract.do b/ONS-UK-EFS-LCFS-time-series-extract.do index 0ef7688555d3b97b27c63f35b531d183e96487c5..70025b8560c6b7a4f335cf7e30895c48dd7d276f 100755 --- a/ONS-UK-EFS-LCFS-time-series-extract.do +++ b/ONS-UK-EFS-LCFS-time-series-extract.do @@ -25,7 +25,7 @@ * so a mechanism has been included to allow the extraction of bespoke variables sets once this script has set all variable names to lower case * * For all subsequent changes see: -* -> https://github.com/dataknut/LCFS/blob/master/ONS-UK-EFS-time-series-extract.do +* -> https://github.com/dataknut/LCFS/blob/master/ONS-UK-EFS-LCFS-time-series-extract.do * Copyright (c) 2002-2012 University of Essex, (c) 2012-2015 University of Southampton * @@ -49,8 +49,10 @@ set more off * set these to what works for you -local place = "/Users/ben/Documents/Work" local efsd = "`place'/Data/Social Science Datatsets/Expenditure and Food Survey" -local logd = "`efsd'/log_files" local outd = "`efsd'/processed" +local place = "~/Documents/Work" +local efsd = "`place'/Data/Social Science Datatsets/Expenditure and Food Survey" +local logd = "`efsd'/log_files" +local outd = "`efsd'/processed" ************************ * NB - the script assumes a certain folder structure for the source EFS/LCFS data like so: @@ -64,7 +66,7 @@ local extract_years "2001-2012" // just a name for the FINAL extracted file * To save time you can leave out years you have already processed * just paste the ones you want into the do_years local variable below -* choose any of 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012 +* choose any of 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012 local do_years = "2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006 2007 2008 2009 2010 2011 2012" // years to process * set to 1 to refresh each yearly extract you listed in do_years & append the files @@ -83,9 +85,10 @@ local dvhh_keepvars = "" * needed for income equivalisation later local dvhh_keepvars = "`dvhh_keepvars' incanon a055 g018 g019 p116* p344* p389* p396*" * DEMAND 2.3 (older people mobile lives) -local dvhh_keepvars = "`dvhh_keepvars' b480 b481 b485 cc5413t c73312t" // overseas travel expenditures +*local dvhh_keepvars = "`dvhh_keepvars' b480 b481 b485 cc5413t c73312t" // overseas travel expenditures * DEMAND 3.1 (adapting infrastructures) -local dvhh_keepvars = "`dvhh_keepvars' a103 a108 a128 a130 a15* a16* a1701 a1711" // energy using appliances +* a1701 a1711 +local dvhh_keepvars = "`dvhh_keepvars' a103 a108 a128 a130 a15* a16*" // energy using appliances * -> dvper file local dvper_keepvars = "case" @@ -102,7 +105,7 @@ local rawper_keepvars = "case" * set logging capture log close log using "`logd'/ONS-UK-EFS-time-series-extract-$S_DATE.smcl", replace - clear all +clear all * increase default number of variables allowed set maxvar 10000, perm @@ -115,7 +118,7 @@ di "* dvhh: `dvhh_keepvars' " di "* rawhh: `rawhh_keepvars'" di "* This could take some time. " di "* I suggest you check it is running and then get a cup of tea...." -di "*******************************************************" +di "*******************************************************" if `do_extracts' { di "do_extracts = `do_extracts', all years (`years') to be extracted and refreshed" @@ -126,44 +129,178 @@ if `do_extracts' { * c_accom_0 c_accom_1 c_accom_2 c_accom_3 c_accom_4 * 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other" - * c_age_0 c_age_1 c_age_2 c_age_3 c_age_4 c_age_5 c_age_6 c_age_7 + + * c_age_0 c_age_1 c_age_2 c_age_3 c_age_4 c_age_5 c_age_6 c_age_7 * 0 "16-24" 1 "25-34" 2 "35-44" 3 "45-54" 4 "55-64" 5 "65-74" 6 "75+" - * c_comp_0 c_comp_1 c_comp_2 c_comp_3 * 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other" - * c_lli_0 c_lli_1 * 0 "No person with lli" 1 "At least 1 person with lli" + * c_comp_0 c_comp_1 c_comp_2 c_comp_3 + * 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other" + + * c_lli_0 c_lli_1 + * 0 "No person with lli" 1 "At least 1 person with lli" * c_ncars_0 c_ncars_1 c_ncars_2 - * 0 None 1 One 2 "Two or more" + * 0 None 1 One 2 "Two or more" - * c_nchild_0 c_nchild_1 c_nchild_2 * 0 None 1 One 2 "Two or more" - * c_nearners_0 c_nearners_1 c_nearners_2 c_nearners_3 + * c_nchild_0 c_nchild_1 c_nchild_2 + * 0 None 1 One 2 "Two or more" + + * c_nearners_0 c_nearners_1 c_nearners_2 c_nearners_3 * 0 "0" 1 "1" 2 "2" 3 "3+" - * c_npersons_0 c_npersons_1 c_npersons_2 c_npersons_3 c_npersons_4 * 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+" + + * c_npersons_0 c_npersons_1 c_npersons_2 c_npersons_3 c_npersons_4 + * 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+" * c_nrooms_0 c_nrooms_1 c_nrooms_2 c_nrooms_3 * 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+" - * c_empl_0 c_empl_1 c_empl_2 c_empl_3 c_empl_4 * 0 "NS-SEC 1" 1 "NS-SEC 2" 2 "NS-SEC 3" 3 "Inactive" 4 "Retired" + * c_empl_0 c_empl_1 c_empl_2 c_empl_3 c_empl_4 + * 0 "NS-SEC 1" 1 "NS-SEC 2" 2 "NS-SEC 3" 3 "Inactive" 4 "Retired" * c_gender_0 c_gender_1 * 0 "Male" 1 "Female" - * c_tenure_0 c_tenure_1 c_tenure_2 c_tenure_3 * 0 "Owned" 1 "Rent from council" 2 "Social rent" 3 "Private rent incl rent free" + + * c_tenure_0 c_tenure_1 c_tenure_2 c_tenure_3 + * 0 "Owned" 1 "Rent from council" 2 "Social rent" 3 "Private rent incl rent free" - * c_white_0 c_white_1 * 0 "White HRP" 1 "Non-white HRP" - */ ****************************** - * dvhh di "* dv household file" use "`efsd'/`y'/stata/dvhh.dta", clear + * c_white_0 c_white_1 + * 0 "White HRP" 1 "Non-white HRP" + + */ + + ****************************** + * dvhh + di "* dv household file" + use "`efsd'/`y'/stata/dvhh.dta", clear + * 2010 onwards data = mixed/uppercase if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { rename *, lower - } ** sex of HRP gen c_gender = -1 replace c_gender = 0 if sexhrp == 1 replace c_gender = 1 if sexhrp == 2 lab var c_gender "Constraint: Gender of HRP" lab def c_gender 0 "Male" 1 "Female" lab val c_gender c_gender ** age of HRP - * need to use 75+ as few 80+ after 2001-2 recode p396p (min/15= . ) (16/24 = 0) (25/34 = 1) (35/44 = 2) (45/54 = 3) /// (55/64 = 4) (65/74 = 5) (75/max = 6), gen(c_age) lab var c_age "Constraint: Age of HRP" - * NB for NI need to change these as Census categories are different. Why why why!? label define c_age /// 0 "16-24" /// 1 "25-34" /// 2 "35-44" /// 3 "45-54" /// 4 "55-64" /// 5 "65-74" /// 6 "75+" lab val c_age c_age - ** number of rooms *1,2,3,4+ recode a114 (1=0) (2=1) (3=2) (4=3) (5/max=4), gen(c_nrooms) lab var c_nrooms "Constraint: number of rooms" lab def c_nrooms 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+" lab val c_nrooms c_nrooms ** Number of residents per household recode a049 (1=0) (2=1) (3=2) (4=3) (5/max=4), gen(c_npersons) lab var c_npersons "Constraint: number of persons in household (all ages)" lab def c_npersons 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+" lab val c_npersons c_npersons ** Number of earners recode a054 (0=0) (1=1) (2=2) (3/max=3), gen(c_nearners) lab var c_nearners "Constraint: number of earners in household" lab def c_nearners 0 "0" 1 "1" 2 "2" 3 "3+" lab val c_nearners c_nearners ** Cars and vans. * Object= * 0 None * 1 One * 2 'Two or more'. *has 1-12 so need to recode. recode a124 (0=0) (1=1) (2/12=2), gen(c_ncars) lab var c_ncars "Constraint: cars and vans" lab define c_ncars 0 None 1 One 2 "Two or more" lab val c_ncars c_ncars *tab a124 c_cars ** Tenure. * Object = * 0 'Owned' * 1 'Rent from council' * 2 'Social rent' * 3 'Private rent' - incl rent-free *use a121. recode a121 (5/7=0) (1=1) (2=2) (3/4 8=3), gen(c_tenure) lab var c_tenure "Constraint: tenure" lab define c_tenure 0 "Owned" 1 "Rent from council" 2 "Social rent" 3 "Private rent incl rent free" lab val c_tenure c_tenure *tab a121 c_tenure ** employment status. * Object = * 0 'NS-SEC 1' * 1 'NS-SEC 2' * 2 'NS-SEC 3' * 3 'Inactive' * 4 'Retired'. * need to combine these - a093 = activity, a094 = NS-SEC. * ref ONS website. * need to put a094 = 9,10,11 ('Never worked and long term unemployed',students, not stated) into 'inactive' recode a094 (0/2=0) (3/4=1) (5/8=2) (9/12=3), gen(c_empl) * the crosstab of a094 against a093 shows that some who are coded as * retired/unoccupied (a093=4/5) have an NS-SEC code as they are recently * retired/unoccupied (?). In this case we use the activity code not the NS-SEC code. replace c_empl=3 if a093==7 replace c_empl=4 if a093==6 lab var c_empl "Constraint: employment status of HRP" lab define c_empl 0 "NS-SEC 1" 1 "NS-SEC 2" 2 "NS-SEC 3" 3 "Inactive" 4 "Retired" lab val c_empl c_empl *tab a093 c_empl *tab a094 c_empl - ** Region. * use gorx. gen region = gorx label define region 1 "North East" 2 "North West & Merseyside" 3 "Yorkshire and the Humber" /// 4 "East Midlands" 5 "West Midlands" 6 "Eastern" 7 "London" 8 "South East" /// 9 "South West" 10 "Wales" 11 "Scotland" 12 "Northern Ireland" lab var region "Govt. Office Region" lab val region region ** Number of children - 16 or younger. *Object = * 0 0 * 1 1 * 2 2+. gen c_nchild = a040+a041+a042 + } + ** sex of HRP + gen c_gender = -1 + replace c_gender = 0 if sexhrp == 1 + replace c_gender = 1 if sexhrp == 2 + lab var c_gender "Constraint: Gender of HRP" + lab def c_gender 0 "Male" 1 "Female" + lab val c_gender c_gender + + ** age of HRP + * need to use 75+ as few 80+ after 2001-2 + recode p396p (min/15= . ) (16/24 = 0) (25/34 = 1) (35/44 = 2) (45/54 = 3) /// + (55/64 = 4) (65/74 = 5) (75/max = 6), gen(c_age) + lab var c_age "Constraint: Age of HRP" + * NB for NI need to change these as Census categories are different. Why why why!? + label define c_age /// + 0 "16-24" /// + 1 "25-34" /// + 2 "35-44" /// + 3 "45-54" /// + 4 "55-64" /// + 5 "65-74" /// + 6 "75+" + + lab val c_age c_age + + ** number of rooms + *1,2,3,4+ + recode a114 (1=0) (2=1) (3=2) (4=3) (5/max=4), gen(c_nrooms) + lab var c_nrooms "Constraint: number of rooms" + lab def c_nrooms 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+" + lab val c_nrooms c_nrooms + + ** Number of residents per household + recode a049 (1=0) (2=1) (3=2) (4=3) (5/max=4), gen(c_npersons) + lab var c_npersons "Constraint: number of persons in household (all ages)" + lab def c_npersons 0 "1" 1 "2" 2 "3" 3 "4" 4 "5+" + lab val c_npersons c_npersons + + ** Number of earners + recode a054 (0=0) (1=1) (2=2) (3/max=3), gen(c_nearners) + lab var c_nearners "Constraint: number of earners in household" + lab def c_nearners 0 "0" 1 "1" 2 "2" 3 "3+" + lab val c_nearners c_nearners + + ** Cars and vans. + * Object= + * 0 None + * 1 One + * 2 'Two or more'. + + *has 1-12 so need to recode. + + recode a124 (0=0) (1=1) (2/12=2), gen(c_ncars) + lab var c_ncars "Constraint: cars and vans" + lab define c_ncars 0 None 1 One 2 "Two or more" + lab val c_ncars c_ncars + *tab a124 c_cars + + ** Tenure. + * Object = + * 0 'Owned' + * 1 'Rent from council' + * 2 'Social rent' + * 3 'Private rent' - incl rent-free + + *use a121. + recode a121 (5/7=0) (1=1) (2=2) (3/4 8=3), gen(c_tenure) + lab var c_tenure "Constraint: tenure" + lab define c_tenure 0 "Owned" 1 "Rent from council" 2 "Social rent" 3 "Private rent incl rent free" + lab val c_tenure c_tenure + *tab a121 c_tenure + + ** employment status. + * Object = + * 0 'NS-SEC 1' + * 1 'NS-SEC 2' + * 2 'NS-SEC 3' + * 3 'Inactive' + * 4 'Retired'. + + * need to combine these - a093 = activity, a094 = NS-SEC. + * ref ONS website. + + * need to put a094 = 9,10,11 ('Never worked and long term unemployed',students, not stated) into 'inactive' + recode a094 (0/2=0) (3/4=1) (5/8=2) (9/12=3), gen(c_empl) + * the crosstab of a094 against a093 shows that some who are coded as + * retired/unoccupied (a093=4/5) have an NS-SEC code as they are recently + * retired/unoccupied (?). In this case we use the activity code not the NS-SEC code. + + replace c_empl=3 if a093==7 + replace c_empl=4 if a093==6 + + lab var c_empl "Constraint: employment status of HRP" + lab define c_empl 0 "NS-SEC 1" 1 "NS-SEC 2" 2 "NS-SEC 3" 3 "Inactive" 4 "Retired" + lab val c_empl c_empl + *tab a093 c_empl + *tab a094 c_empl + + ** Region. + * use gorx. + + gen region = gorx + label define region 1 "North East" 2 "North West & Merseyside" 3 "Yorkshire and the Humber" /// + 4 "East Midlands" 5 "West Midlands" 6 "Eastern" 7 "London" 8 "South East" /// + 9 "South West" 10 "Wales" 11 "Scotland" 12 "Northern Ireland" + lab var region "Govt. Office Region" + lab val region region + + + ** Number of children - 16 or younger. + *Object = + * 0 0 + * 1 1 + * 2 2+. + gen c_nchild = a040+a041+a042 * could use g019? - * has 1, 2 and more than 2 - so needs recoding. recode c_nchild (0=0) (1=1) (2/max=2) lab var c_nchild "Constraint: number of children" lab define c_nchild 0 None 1 One 2 "Two or more" lab val c_nchild c_nchild + * has 1, 2 and more than 2 - so needs recoding. + recode c_nchild (0=0) (1=1) (2/max=2) + lab var c_nchild "Constraint: number of children" + lab define c_nchild 0 None 1 One 2 "Two or more" + lab val c_nchild c_nchild + gen ba_year = `y' * construct list of vars to keep @@ -181,7 +318,7 @@ if `do_extracts' { * try as a varlist (forces expansion) di "* Not found, expanding `v'" capture noisily { - * if variable really doesn't exist this throws an exception moves on + * if variable really doesn't exist this throws an exception & moves on foreach vt of varlist `v' { capture confirm variable `vt' if !_rc { @@ -195,18 +332,71 @@ if `do_extracts' { di "* dvhh: keeping case* ba_* c_* region weight* `keepvars'" keep case* ba_* c_* region weight* `keepvars' - qui: compress * save kept dvhh vars save "`efsd'/`y'/stata/dvhh-temp.dta", replace ****************************** - ****************************** - * rawhh di "* raw household file for: c_comp and c_accom" - di "* also to pick up: `rawhh_keepvars'" use "`efsd'/`y'/stata/rawhh.dta", clear + qui: compress + * save kept dvhh vars + save "`efsd'/`y'/stata/dvhh-temp.dta", replace + ****************************** + + ****************************** + * rawhh + di "* raw household file for: c_comp and c_accom" + di "* also to pick up: `rawhh_keepvars'" + + use "`efsd'/`y'/stata/rawhh.dta", clear + * 2010 onwards data = mixed/uppercase if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { rename *, lower - } * accomodation type * Object= * 0 Detached * 1 Semi * 2 Terrace * 3 flat/maisontte * 4 other gen ba_c_accom = -1 replace ba_c_accom = 0 if hsetype == 1 replace ba_c_accom = 1 if hsetype == 2 replace ba_c_accom = 2 if hsetype == 3 + } + + * accomodation type + * Object= + * 0 Detached + * 1 Semi + * 2 Terrace + * 3 flat/maisontte + * 4 other + gen ba_c_accom = -1 + replace ba_c_accom = 0 if hsetype == 1 + replace ba_c_accom = 1 if hsetype == 2 + replace ba_c_accom = 2 if hsetype == 3 if survyr > 2001 { * grr, why can't var names stay the same?! gen acomtype = accom - } replace ba_c_accom = 3 if acomtype == 2 replace ba_c_accom = 4 if acomtype == 3 replace ba_c_accom = 4 if acomtype == 4 lab var ba_c_accom "Constraint: accommodation type" lab define c_accom 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other" lab val ba_c_accom c_accom ** Composition. * Object = * 0 'married/partnered' * 1 'single parent' * 2 'single person' * 3 'other'. * co-habiting. gen ba_c_comp=1 if numcpart>0 * married. replace ba_c_comp=0 if nummpart>0 * single parent - assumes a single adult living with 1 or more * children is a single parent. replace ba_c_comp=1 if (numadult==1 & numchild>0) * single person. replace ba_c_comp=2 if (numadult==1 & numchild==0) * the rest - this is a cheat! recode ba_c_comp (missing=3) lab var ba_c_comp "Constraint: household composition" lab define c_comp 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other" lab val ba_c_comp c_comp tab ba_c_comp numcpart tab ba_c_comp nummpart + } + replace ba_c_accom = 3 if acomtype == 2 + replace ba_c_accom = 4 if acomtype == 3 + replace ba_c_accom = 4 if acomtype == 4 + + lab var ba_c_accom "Constraint: accommodation type" + lab define c_accom 0 "Detached" 1 "Semi-detached" 2 "Terraced" 3 "flat/maisonette" 4 "other" + lab val ba_c_accom c_accom + + ** Composition. + * Object = + * 0 'married/partnered' + * 1 'single parent' + * 2 'single person' + * 3 'other'. + + * co-habiting. + gen ba_c_comp=1 if numcpart>0 + * married. + replace ba_c_comp=0 if nummpart>0 + * single parent - assumes a single adult living with 1 or more + * children is a single parent. + replace ba_c_comp=1 if (numadult==1 & numchild>0) + * single person. + replace ba_c_comp=2 if (numadult==1 & numchild==0) + * the rest - this is a cheat! + recode ba_c_comp (missing=3) + lab var ba_c_comp "Constraint: household composition" + lab define c_comp 0 "married/partnered" 1 "single parent" 2 "single person" 3 "other" + lab val ba_c_comp c_comp + tab ba_c_comp numcpart + tab ba_c_comp nummpart + + if "`y'" == "2001-2002" | "`y'" == "2002-2003" | "`y'" == "2003-2004" | "`y'" == "2004-2005" | "`y'" == "2005-2006" | "`y'" == "2006" | "`y'" == "2007" { di "* Setting up cal year for `y'" * tab survyr sampyear, mi @@ -245,16 +435,18 @@ if `do_extracts' { qui: compress - save "`efsd'/`y'/stata/rawhh-temp.dta", replace ****************************** - ****************************** + save "`efsd'/`y'/stata/rawhh-temp.dta", replace + ****************************** + + ****************************** * rawper di "* Need rawper file for ethnicity detail" - use "`efsd'/`y'/stata/rawper.dta", clear + use "`efsd'/`y'/stata/rawper.dta", clear * 2010 data = mixed/uppercase if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { rename *, lower - } + } * keep HRPs tab hrpid, nol @@ -321,17 +513,27 @@ if `do_extracts' { ****************************** ****************************** - * dvper di "* Need dvper file to count n children aged under 14 (for OECD equivalisation weight)" + * dvper + di "* Need dvper file to count n children aged under 14 (for OECD equivalisation weight)" + + use "`efsd'/`y'/stata/dvper.dta", clear - use "`efsd'/`y'/stata/dvper.dta", clear * 2010 data = mixed/uppercase if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" { rename *, lower - } * keep only hrps keep if a003 == 1 + } + + * keep only hrps + keep if a003 == 1 * Ethnicity * NB a012 and a013 changed categories 2007->2008 to 0 = n/a, 1 = white, 2 = everyone else - * this still works, you can get detail from rawper.dta recode a012p (1=0) (2/max=1), gen(c_white) lab var c_white "Constraint: non-white HRP" lab def c_white 0 "White HRP" 1 "Non-white HRP" lab val c_white c_white * if HRPs are not classified - they will show up as missing + * this still works, you can get detail from rawper.dta + recode a012p (1=0) (2/max=1), gen(c_white) + lab var c_white "Constraint: non-white HRP" + lab def c_white 0 "White HRP" 1 "Non-white HRP" + lab val c_white c_white + * if HRPs are not classified - they will show up as missing * construct list of vars to keep * if dvper_keepvars is empty STATA will skip @@ -346,21 +548,24 @@ if `do_extracts' { di in red "`v' does not exist in `y' - will be missing" } } - keep case* c_white + keep case* c_white qui: compress - save "`efsd'/`y'/stata/dvper-temp.dta", replace - ****************************** - ************ * Now c_lli but this time need to collapse it so we count the number in the household with/out lli - * and count the number of children of various ages - use "`efsd'/`y'/stata/dvper.dta", clear + save "`efsd'/`y'/stata/dvper-temp.dta", replace + ****************************** + + ************ + * Now c_lli but this time need to collapse it so we count the number in the household with/out lli + * and count the number of children of various ages + + use "`efsd'/`y'/stata/dvper.dta", clear * 2010 onwards data = mixed/uppercase if "`y'" == "2010" | "`y'" == "2011" | "`y'" == "2012" | "`y'" == "2012" { rename *, lower } - + * assume if someone is eligible for incapacity benefit then = lli * even if not actually receiving * in rawper dvincap = a + b @@ -390,12 +595,18 @@ if `do_extracts' { keep case* ba_* c_* qui: compress save "`efsd'/`y'/stata/dvper-lli.dta", replace - di "*** MERGE ALL FILES for year = `y'" + + + di "*** MERGE ALL FILES for year = `y'" * - qui: merge case using /// "`efsd'/`y'/stata/dvhh-temp.dta" /// - "`efsd'/`y'/stata/rawper-temp.dta" /// "`efsd'/`y'/stata/dvper-temp.dta" /// - "`efsd'/`y'/stata/dvper-lli.dta" /// "`efsd'/`y'/stata/rawhh-temp.dta" , sort + + qui: merge case using /// + "`efsd'/`y'/stata/dvhh-temp.dta" /// + "`efsd'/`y'/stata/rawper-temp.dta" /// + "`efsd'/`y'/stata/dvper-temp.dta" /// + "`efsd'/`y'/stata/dvper-lli.dta" /// + "`efsd'/`y'/stata/rawhh-temp.dta" , sort su _merge* @@ -405,7 +616,8 @@ if `do_extracts' { erase "`efsd'/`y'/stata/dvper-temp.dta" erase "`efsd'/`y'/stata/dvper-lli.dta" erase "`efsd'/`y'/stata/rawhh-temp.dta" - gen survey_year = "`y'" + + gen survey_year = "`y'" tab survey_year aorder @@ -422,7 +634,22 @@ if `do_extracts' { gen ba_adults = g018 if g018 > 0 * ignore hhs with no adults (how many are there?) - /* 1st adult = .67 spouse = .33 other 2nd adult = .33 3rd adult = .33 subsequent adults = .33 children aged < 14 = .2 children aged 14+ = .33 */ * catch hh with no children replace ba_under14 = 0 if ba_under14 == . replace ba_over14 = 0 if ba_over14 == . gen oecd_equivbhcwt = 0.67 if ba_adults >= 1 replace oecd_equivbhcwt = oecd_equivbhcwt + ((ba_adults-1) * 0.33) + (ba_under14 * 0.2) + (ba_over14 * 0.33) di "*-> Calculating equiv income (OECD) and quartiles/deciles" + /* + 1st adult = .67 + spouse = .33 + other 2nd adult = .33 + 3rd adult = .33 + subsequent adults = .33 + children aged < 14 = .2 + children aged 14+ = .33 + */ + * catch hh with no children + replace ba_under14 = 0 if ba_under14 == . + replace ba_over14 = 0 if ba_over14 == . + + gen oecd_equivbhcwt = 0.67 if ba_adults >= 1 + replace oecd_equivbhcwt = oecd_equivbhcwt + ((ba_adults-1) * 0.33) + (ba_under14 * 0.2) + (ba_over14 * 0.33) + di "*-> Calculating equiv income (OECD) and quartiles/deciles" * p344, p389 & p396 changed to *p after 2006 and top coded (!) if `y' > 2005 { @@ -452,10 +679,17 @@ if `do_extracts' { * quarter labels changed in 2006 * old: - * 1 april to june * 2 june to september * 3 october to december * 4 january to march + * 1 april to june + * 2 june to september + * 3 october to december + * 4 january to march * new: - * a099: * 1 january to march * 2 april to june * 3 july to september * 4 october to december + * a099: + * 1 january to march + * 2 april to june + * 3 july to september + * 4 october to december gen ba_month = a055 @@ -471,7 +705,8 @@ if `do_extracts' { gen(ba_birth_cohort) * tab ba_birth_cohort c_age, mi - *********************** + + *********************** * End of per-year processing gen ba_quarter = -1 @@ -499,7 +734,8 @@ if `do_extracts' { qui: compress - save "`outd'/EFS-`y'-extract-BA.dta", replace } + save "`outd'/EFS-`y'-extract-BA.dta", replace + } } **************************** @@ -555,6 +791,6 @@ else { di "*-> do_extracts = `do_extracts', years not extracted so individual files not refreshed" } - di "*-> Job ended at $S_DATE" +di "*-> Job ended at $S_DATE" -log close +log close