diff --git a/UK-IPS-time-series-extract.do b/UK-IPS-time-series-extract.do index 1047c11388a35dc53635105d163deceee1ab97b0..510d7866ddba8fad6d00e67391461198045328f5 100644 --- a/UK-IPS-time-series-extract.do +++ b/UK-IPS-time-series-extract.do @@ -45,8 +45,10 @@ set more off * set these to what works for you -local place = "/Users/ben/Documents/Work" local ipsd = "`place'/Data/Social Science Datatsets/UK International Passenger Survey" -local logd = "`ipsd'/log_files" local outd = "`ipsd'/processed" +local place = "~/Documents/Work" +local ipsd = "`place'/Data/Social Science Datatsets/UK International Passenger Survey" +local logd = "`ipsd'/log_files" +local outd = "`ipsd'/processed" ************************ * NB - the script assumes a certain folder structure for the source IPS data like so: @@ -83,7 +85,7 @@ local qcont_keepvars = "year qtr month serial flow age purp fweight" * set logging capture log close log using "`logd'/UK-IPS-time-series-extract-$S_DATE.smcl", replace - clear all +clear all * increase default number of variables allowed set maxvar 10000, perm @@ -95,7 +97,7 @@ di "* It will keep the following variables:" di "* `keepvars' " di "* This could take some time. " di "* I suggest you check it is running and then get a cup of tea...." -di "*******************************************************" +di "*******************************************************" if `do_extracts' { di "do_extracts = `do_extracts', all years (`years') to be extracted and refreshed" @@ -103,8 +105,10 @@ if `do_extracts' { foreach y of local do_years { di "* * * * * * * * * " di "* -> Processing `y'" - ****************************** - * qcount di "* qcount file" + + ****************************** + * qcount + di "* qcount file" local pref = "qcont" local suff "`y'" @@ -140,33 +144,62 @@ if `do_extracts' { * in some years there are 4 quarterly files if "`y'" == "01" | "`y'" == "02" | "`y'" == "03" | "`y'" == "04" | "`y'" == "05" | "`y'" == "06" | "`y'" == "09" | "`y'" == "12" | "`y'" == "14" { local qs "1 2 3 4" - foreach n of local qs { use "`ipsd'/20`y'/stata/`pref'`n'`suff'.dta", clear * make everything lower case + foreach n of local qs { + use "`ipsd'/20`y'/stata/`pref'`n'`suff'.dta", clear + * make everything lower case rename *, lower - * save all vars - process later save "`outd'/tmp/qcont`n'`y'-temp.dta", replace ****************************** + * save all vars - process later + save "`outd'/tmp/qcont`n'`y'-temp.dta", replace + ****************************** } di "*** APPEND ALL FILES for year = `y'" * - clear qui: append using /// "`outd'/tmp/qcont1`y'-temp.dta" /// - "`outd'/tmp/qcont2`y'-temp.dta" /// "`outd'/tmp/qcont3`y'-temp.dta" /// - "`outd'/tmp/qcont4`y'-temp.dta" + clear + qui: append using /// + "`outd'/tmp/qcont1`y'-temp.dta" /// + "`outd'/tmp/qcont2`y'-temp.dta" /// + "`outd'/tmp/qcont3`y'-temp.dta" /// + "`outd'/tmp/qcont4`y'-temp.dta" * DELETE TEMPORARY FILES - foreach n of local qs { + foreach n of local qs { erase "`outd'/tmp/qcont`n'`y'-temp.dta" - } } else { + } + + } + else { use "`ipsd'/20`y'/stata/`pref'`suff'.dta", clear * make everything lower case rename *, lower } - ** sex gen ba_gender = 0 if sex == 1 replace ba_gender = 1 if sex == 2 - replace ba_gender = . if sex == 9 lab var ba_gender "Gender of HRP" lab def ba_gender 0 "Male" 1 "Female" lab val ba_gender ba_gender ** age - recode age (0=0) (2=1) (4 = 2) (5 = 3) (6 = 4) /// (7 = 5) (8 = 6) (else=.), gen(ba_age) // codes don't know & youth group parties to missing lab var ba_age "Age group (individual)" - label define ba_age /// 0 "0-15" /// 1 "16-24" /// 2 "25-34" /// 3 "35-44" /// 4 "45-54" /// 5 "55-64" /// 6 "65+" lab val ba_age ba_age * check + ** sex + gen ba_gender = 0 if sex == 1 + replace ba_gender = 1 if sex == 2 + replace ba_gender = . if sex == 9 + lab var ba_gender "Gender of HRP" + lab def ba_gender 0 "Male" 1 "Female" + lab val ba_gender ba_gender + + ** age + recode age (0=0) (2=1) (4 = 2) (5 = 3) (6 = 4) /// + (7 = 5) (8 = 6) (else=.), gen(ba_age) // codes don't know & youth group parties to missing + lab var ba_age "Age group (individual)" + label define ba_age /// + 0 "0-15" /// + 1 "16-24" /// + 2 "25-34" /// + 3 "35-44" /// + 4 "45-54" /// + 5 "55-64" /// + 6 "65+" + + lab val ba_age ba_age + * check *tab age ba_age, mi /* @@ -228,7 +261,7 @@ if `do_extracts' { keep `kvars' ba_* qui: compress - + gen survey_year = "20`y'" tab survey_year @@ -236,9 +269,15 @@ if `do_extracts' { * keep all (makes life easier below as some vars are in some years and not others) /* create a birth cohort variable - Birth year Age in 2000 Age in 2010 1900-1909 90-100 1910-1919 80-90 90-100 1920-1929 70-80 80-90 1930-1939 60-70 70-80 1940-1949 50-60 60-70 1950-1959 40-50 50-60 + Birth year Age in 2000 Age in 2010 + 1900-1909 90-100 + 1910-1919 80-90 90-100 + 1920-1929 70-80 80-90 + 1930-1939 60-70 70-80 + 1940-1949 50-60 60-70 + 1950-1959 40-50 50-60 1960-1969 30-40 40-50 - */ + */ * remember these are based on age groups * set the minimum age they could be recode ba_age (0=0) (1=16) (2=25) (3=35) (4=45) (5=55) (6=65), gen(ba_min_age) @@ -256,7 +295,8 @@ if `do_extracts' { drop ba_birthyear ba_min_age qui: compress save "`outd'/IPS-20`y'-extract-BA.dta", replace - local count = "`count' + 1" } + local count = "`count' + 1" + } } **************************** @@ -295,6 +335,6 @@ else { di "*-> do_extracts = `do_extracts', years not extracted so individual files not refreshed" } - di "*-> Job ended at $S_DATE" +di "*-> Job ended at $S_DATE" -log close +log close