Skip to content
Snippets Groups Projects
Commit 0762906c authored by Ben Anderson's avatar Ben Anderson
Browse files

fixed processing script to give random samples

set the % you want using the ‘sample’  local. By default this
constrains the selection to keep the overall ratios of EE_BAND
PROP_TYPE constant (as in the original sampling)
parent 5968a4f2
No related branches found
No related tags found
No related merge requests found
...@@ -88,61 +88,66 @@ if `create_xwavefile' { ...@@ -88,61 +88,66 @@ if `create_xwavefile' {
* create a wide consumption file * create a wide consumption file
preserve preserve
keep HH_ID Gcons* Econs* keep HH_ID Gcons* Econs*
compress
save "`dpath'/processed/`ifile'_consumptionfile_wide_`samplet'.dta", replace save "`dpath'/processed/`ifile'_consumptionfile_wide_`samplet'.dta", replace
restore restore
drop Gcons* Econs*
* fix some mis-codings (or lack of coding of missing)
local vars "E7Flag2012 CWI LI"
foreach v of local vars {
destring `v', force replace
replace `v' = 0 if `v' !=1
label def `v' 0 "No or N/A" 1 "Yes"
label val `v' `v'
}
* turn '99' into missing - ideally missing should be -99 to aid re-coding and avoid unpleasant surprises in naive analysis!
replace FP_ENG = . if LOFT_DEPTH == 99
replace LOFT_DEPTH = . if LOFT_DEPTH == 99
* what do G, L, M mean in the gas 'valid' variables - presumably 0 = off gas & V = valid?
tabstat IMD_WALES, by(REGION) s(mean min max n) preserve
* there seem to be some welsh LSOAs allocated to English GORs? drop Gcons* Econs*
* fix some mis-codings (or lack of coding of missing)
tabstat IMD_ENG, by(REGION) s(mean min max n) local vars "E7Flag2012 CWI LI"
* there seem to be some English LSOAs allocated to Wales? foreach v of local vars {
destring `v', force replace
tabstat FP_ENG, by(REGION) replace `v' = 0 if `v' !=1
* REGION is ONS admin codes label def `v' 0 "No or N/A" 1 "Yes"
* create a new variable with meaningful labels label val `v' `v'
gen ba_region = 1 if REGION == "E12000001" }
replace ba_region = 2 if REGION == "E12000002"
replace ba_region = 3 if REGION == "E12000003" * turn '99' into missing - ideally missing should be -99 to aid re-coding and avoid unpleasant surprises in naive analysis!
replace ba_region = 4 if REGION == "E12000004" replace FP_ENG = . if LOFT_DEPTH == 99
replace ba_region = 5 if REGION == "E12000005" replace LOFT_DEPTH = . if LOFT_DEPTH == 99
replace ba_region = 6 if REGION == "E12000006"
replace ba_region = 7 if REGION == "E12000007" * what do G, L, M mean in the gas 'valid' variables - presumably 0 = off gas & V = valid?
replace ba_region = 8 if REGION == "E12000008"
replace ba_region = 9 if REGION == "E12000009" tabstat IMD_WALES, by(REGION) s(mean min max n)
replace ba_region = 10 if REGION == "W99999999" * there seem to be some welsh LSOAs allocated to English GORs?
lab var ba_region "former Govt Office region (labelled)" tabstat IMD_ENG, by(REGION) s(mean min max n)
* http://www.ons.gov.uk/ons/guide-method/geography/beginner-s-guide/administrative/england/government-office-regions/index.html * there seem to be some English LSOAs allocated to Wales?
lab def ba_region 1 "North East" 2 "North West" 3 "Yorkshire & The Humber" 4 "East Midlands" ///
5 "West Midlands" 6 "East of England" 7 "London" 8 "South East" 9 "South West" 10 "Wales"
lab val ba_region ba_region
compress tabstat FP_ENG, by(REGION)
log off main * REGION is ONS admin codes
log using "`dpath'/processed/codebook-NEED-EULF-2014-`ifile'_xwavefile-`version'-`samplet'-$S_DATE.smcl", replace name(cb_xwave) * create a new variable with meaningful labels
desc gen ba_region = 1 if REGION == "E12000001"
di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?" replace ba_region = 2 if REGION == "E12000002"
di "** urban/rural would be helpful" replace ba_region = 3 if REGION == "E12000003"
codebook replace ba_region = 4 if REGION == "E12000004"
save "`dpath'/processed/`ifile'_xwavefile_`samplet'.dta", replace replace ba_region = 5 if REGION == "E12000005"
log close cb_xwave replace ba_region = 6 if REGION == "E12000006"
log on main replace ba_region = 7 if REGION == "E12000007"
replace ba_region = 8 if REGION == "E12000008"
replace ba_region = 9 if REGION == "E12000009"
replace ba_region = 10 if REGION == "W99999999"
lab var ba_region "former Govt Office region (labelled)"
* http://www.ons.gov.uk/ons/guide-method/geography/beginner-s-guide/administrative/england/government-office-regions/index.html
lab def ba_region 1 "North East" 2 "North West" 3 "Yorkshire & The Humber" 4 "East Midlands" ///
5 "West Midlands" 6 "East of England" 7 "London" 8 "South East" 9 "South West" 10 "Wales"
lab val ba_region ba_region
compress
log off main
log using "`dpath'/processed/codebook-NEED-EULF-2014-`ifile'_xwavefile-`version'-`samplet'-$S_DATE.smcl", replace name(cb_xwave)
desc
di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?"
di "** urban/rural would be helpful"
codebook
compress
save "`dpath'/processed/`ifile'_xwavefile_`samplet'.dta", replace
log close cb_xwave
log on main
restore
} }
if `create_longfile' { if `create_longfile' {
...@@ -169,11 +174,16 @@ if `create_longfile' { ...@@ -169,11 +174,16 @@ if `create_longfile' {
rename _j year rename _j year
* set as panel * set as panel
xtset HH_ID year xtset HH_ID year
di "* check distributions for `samplet' sample"
xtdescribe
xtsum Gcons Econs
compress compress
save "`dpath'/processed/`ifile'_consumptionfile_long_`samplet'.dta", replace save "`dpath'/processed/`ifile'_consumptionfile_long_`samplet'.dta", replace
* this leaves us with the long form file in memory
} }
/* /*
* Link xwave data to long form file
* THIS TAKES AGES and creates a 1.5 GB file for the full dataset - use with care * THIS TAKES AGES and creates a 1.5 GB file for the full dataset - use with care
* now just merge them * now just merge them
* start with long file which may or may not have just been re-created * start with long file which may or may not have just been re-created
...@@ -184,4 +194,6 @@ merge m:1 HH_ID using "`dpath'/`dfile'_xwavefile.dta" ...@@ -184,4 +194,6 @@ merge m:1 HH_ID using "`dpath'/`dfile'_xwavefile.dta"
save "`dpath'/`dfile'_consumptionfile_long_complete_`samplet'.dta", replace save "`dpath'/`dfile'_consumptionfile_long_complete_`samplet'.dta", replace
*/ */
* done!
log close _all log close _all
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment