From 0762906c56d62f09307ada0ab24961b51ffcd4ef Mon Sep 17 00:00:00 2001 From: Ben Anderson <b.anderson@soton.ac.uk> Date: Fri, 10 Oct 2014 17:06:24 +0100 Subject: [PATCH] fixed processing script to give random samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit set the % you want using the ‘sample’ local. By default this constrains the selection to keep the overall ratios of EE_BAND PROP_TYPE constant (as in the original sampling) --- NEED/process-NEED-EULF-2014.do | 112 ++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 50 deletions(-) diff --git a/NEED/process-NEED-EULF-2014.do b/NEED/process-NEED-EULF-2014.do index c182c73..177dfe3 100644 --- a/NEED/process-NEED-EULF-2014.do +++ b/NEED/process-NEED-EULF-2014.do @@ -88,61 +88,66 @@ if `create_xwavefile' { * create a wide consumption file preserve keep HH_ID Gcons* Econs* + compress save "`dpath'/processed/`ifile'_consumptionfile_wide_`samplet'.dta", replace restore - drop Gcons* Econs* - * fix some mis-codings (or lack of coding of missing) - local vars "E7Flag2012 CWI LI" - foreach v of local vars { - destring `v', force replace - replace `v' = 0 if `v' !=1 - label def `v' 0 "No or N/A" 1 "Yes" - label val `v' `v' - } - - * turn '99' into missing - ideally missing should be -99 to aid re-coding and avoid unpleasant surprises in naive analysis! - replace FP_ENG = . if LOFT_DEPTH == 99 - replace LOFT_DEPTH = . if LOFT_DEPTH == 99 - - * what do G, L, M mean in the gas 'valid' variables - presumably 0 = off gas & V = valid? - tabstat IMD_WALES, by(REGION) s(mean min max n) - * there seem to be some welsh LSOAs allocated to English GORs? - - tabstat IMD_ENG, by(REGION) s(mean min max n) - * there seem to be some English LSOAs allocated to Wales? - - tabstat FP_ENG, by(REGION) - * REGION is ONS admin codes - * create a new variable with meaningful labels - gen ba_region = 1 if REGION == "E12000001" - replace ba_region = 2 if REGION == "E12000002" - replace ba_region = 3 if REGION == "E12000003" - replace ba_region = 4 if REGION == "E12000004" - replace ba_region = 5 if REGION == "E12000005" - replace ba_region = 6 if REGION == "E12000006" - replace ba_region = 7 if REGION == "E12000007" - replace ba_region = 8 if REGION == "E12000008" - replace ba_region = 9 if REGION == "E12000009" - replace ba_region = 10 if REGION == "W99999999" - - lab var ba_region "former Govt Office region (labelled)" - * http://www.ons.gov.uk/ons/guide-method/geography/beginner-s-guide/administrative/england/government-office-regions/index.html - lab def ba_region 1 "North East" 2 "North West" 3 "Yorkshire & The Humber" 4 "East Midlands" /// - 5 "West Midlands" 6 "East of England" 7 "London" 8 "South East" 9 "South West" 10 "Wales" - lab val ba_region ba_region + preserve + drop Gcons* Econs* + * fix some mis-codings (or lack of coding of missing) + local vars "E7Flag2012 CWI LI" + foreach v of local vars { + destring `v', force replace + replace `v' = 0 if `v' !=1 + label def `v' 0 "No or N/A" 1 "Yes" + label val `v' `v' + } + + * turn '99' into missing - ideally missing should be -99 to aid re-coding and avoid unpleasant surprises in naive analysis! + replace FP_ENG = . if LOFT_DEPTH == 99 + replace LOFT_DEPTH = . if LOFT_DEPTH == 99 + + * what do G, L, M mean in the gas 'valid' variables - presumably 0 = off gas & V = valid? + + tabstat IMD_WALES, by(REGION) s(mean min max n) + * there seem to be some welsh LSOAs allocated to English GORs? + + tabstat IMD_ENG, by(REGION) s(mean min max n) + * there seem to be some English LSOAs allocated to Wales? - compress - log off main - log using "`dpath'/processed/codebook-NEED-EULF-2014-`ifile'_xwavefile-`version'-`samplet'-$S_DATE.smcl", replace name(cb_xwave) - desc - di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?" - di "** urban/rural would be helpful" - codebook - save "`dpath'/processed/`ifile'_xwavefile_`samplet'.dta", replace - log close cb_xwave - log on main + tabstat FP_ENG, by(REGION) + * REGION is ONS admin codes + * create a new variable with meaningful labels + gen ba_region = 1 if REGION == "E12000001" + replace ba_region = 2 if REGION == "E12000002" + replace ba_region = 3 if REGION == "E12000003" + replace ba_region = 4 if REGION == "E12000004" + replace ba_region = 5 if REGION == "E12000005" + replace ba_region = 6 if REGION == "E12000006" + replace ba_region = 7 if REGION == "E12000007" + replace ba_region = 8 if REGION == "E12000008" + replace ba_region = 9 if REGION == "E12000009" + replace ba_region = 10 if REGION == "W99999999" + + lab var ba_region "former Govt Office region (labelled)" + * http://www.ons.gov.uk/ons/guide-method/geography/beginner-s-guide/administrative/england/government-office-regions/index.html + lab def ba_region 1 "North East" 2 "North West" 3 "Yorkshire & The Humber" 4 "East Midlands" /// + 5 "West Midlands" 6 "East of England" 7 "London" 8 "South East" 9 "South West" 10 "Wales" + lab val ba_region ba_region + + compress + log off main + log using "`dpath'/processed/codebook-NEED-EULF-2014-`ifile'_xwavefile-`version'-`samplet'-$S_DATE.smcl", replace name(cb_xwave) + desc + di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?" + di "** urban/rural would be helpful" + codebook + compress + save "`dpath'/processed/`ifile'_xwavefile_`samplet'.dta", replace + log close cb_xwave + log on main + restore } if `create_longfile' { @@ -169,11 +174,16 @@ if `create_longfile' { rename _j year * set as panel xtset HH_ID year + di "* check distributions for `samplet' sample" + xtdescribe + xtsum Gcons Econs compress save "`dpath'/processed/`ifile'_consumptionfile_long_`samplet'.dta", replace + * this leaves us with the long form file in memory } /* +* Link xwave data to long form file * THIS TAKES AGES and creates a 1.5 GB file for the full dataset - use with care * now just merge them * start with long file which may or may not have just been re-created @@ -184,4 +194,6 @@ merge m:1 HH_ID using "`dpath'/`dfile'_xwavefile.dta" save "`dpath'/`dfile'_consumptionfile_long_complete_`samplet'.dta", replace */ +* done! + log close _all -- GitLab