Commit 5968a4f2 authored by Ben Anderson's avatar Ben Anderson
Browse files

added panel (xt) descriptives

parent 228ae8bf
......@@ -39,6 +39,7 @@ if `do_2014_desc' {
use "`proot'/NEED/End User Licence File 2014/UKDA-7518-stata11/stata11/need_eul_may2014.dta", clear
* distributions for 2012 (to test)
* processor intensive
local vars "Econs2012 Gcons2012"
local tvars "EE_BAND FLOOR_AREA_BAND PROP_AGE"
foreach v of local vars {
......@@ -56,6 +57,13 @@ if `do_long_desc' {
* Now use the pre-processed long form file which contains all years of consumption data but not the constant values (housing charactersitics etc) which are in the xwave file
use "`dpath'/need_eul_may2014_longfile.dta", clear
* set as panel
xtset HH_ID year, delta(1 year)
xtdescribe
xtsum Econs Gcons
* summarise Electricity
table EconsValid year, c(count Econs min Econs mean Econs max Econs)
* summarise Gas
......
......@@ -35,7 +35,17 @@ local dpath "`proot'/NEED/End User Licence File 2014/"
local ifile "need_eul_may2014"
* original data file
local dfile_orig "`dpath'UKDA-7518-stata11/stata11/`ifile'.dta"
local version "v1"
* 10 = 10% sample, 50 = 50% sample, 100 = 100% sample
local sample 10
local samplet "`sample'pc"
local sampleby "EE_BAND PROP_TYPE"
local version "v1.1"
* includes production of % samples which maintain the original dimensions used to
* produce the EULF samples: EE_BAND PROP_TYPE
*local version "v1"
set more off
......@@ -43,17 +53,20 @@ log using "`dpath'/processed/process-NEED-EULF-2014-`version'-$S_DATE.smcl", rep
* use these locals to control what happens (set to 0 to skip the code)
* create codebook & some descriptives
local create_codebook = 1
local create_codebook = 0
* create wide form fixed file with (supposedly) unchanging data & a seperate 'wide' consumption data file for cross-sectional analysis
local create_xwavefile = 1
* create long form file with wave (yearly) data - be careful, this take a long time due to large memory use!
local create_longfile = 0
local create_longfile = 1
* load the original file
use "`dfile_orig'", clear
if `create_codebook' {
* create the codebook
* create original EULF codebook
* not much point running thid for each % sample although the counts etc reported in the codebook won't match
log off main
log using "`dpath'/processed/codebook-NEED-EULF-2014-`version'-$S_DATE.smcl", replace name(cb)
use "`dfile_orig'", clear
log using "`dpath'/processed/codebook-NEED-EULF-2014-`version'-`sample'pc-$S_DATE.smcl", replace name(cb)
desc
di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?"
codebook
......@@ -61,13 +74,21 @@ if `create_codebook' {
log on main
}
***** random sample ****
* select a random sample but ensure proportions of sampleby are kept
di "* Keeping `sample'% sample by `sampleby'"
sample `sample', by(`sampleby')
tab `sampleby', mi
if `create_xwavefile' {
* create the file with data that (notionally) doesn't change
use "`dfile_orig'", clear
* create a wide consumption file
preserve
keep HH_ID Gcons* Econs*
save "`dpath'/processed/`ifile'_consumptionfile_wide.dta", replace
save "`dpath'/processed/`ifile'_consumptionfile_wide_`samplet'.dta", replace
restore
drop Gcons* Econs*
......@@ -114,12 +135,12 @@ if `create_xwavefile' {
compress
log off main
log using "`dpath'/processed/codebook-NEED-EULF-2014-`ifile'_xwavefile-`version'-$S_DATE.smcl", replace name(cb_xwave)
log using "`dpath'/processed/codebook-NEED-EULF-2014-`ifile'_xwavefile-`version'-`samplet'-$S_DATE.smcl", replace name(cb_xwave)
desc
di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?"
di "** urban/rural would be helpful"
codebook
save "`dpath'/processed/`ifile'_xwavefile.dta", replace
save "`dpath'/processed/`ifile'_xwavefile_`samplet'.dta", replace
log close cb_xwave
log on main
}
......@@ -127,7 +148,7 @@ if `create_xwavefile' {
if `create_longfile' {
* create the long file with as few vars as possible (quicker)
* still takes a while...
use "`dfile_orig'.dta", clear
keep HH_ID Gcons* Econs*
* panel vars:
......@@ -143,24 +164,24 @@ if `create_longfile' {
drop `v'`y'Valid
}
}
* this takes a LONG time - avoid running many times!
* this takes a LONG time for the full dataset
reshape long Gcons GconsValid Econs EconsValid, i(HH_ID)
rename _j year
* set as panel
xtset HH_ID year, delta(1 year)
xtset HH_ID year
compress
save "`dpath'/processed/`ifile'_consumptionfile_long.dta", replace
save "`dpath'/processed/`ifile'_consumptionfile_long_`samplet'.dta", replace
}
/*
* THIS TAKES AGES and creates a 1.5 GB file - use with care
* THIS TAKES AGES and creates a 1.5 GB file for the full dataset - use with care
* now just merge them
* start with long file which may or may not have just been re-created
use "`dpath'/`dfile'_consumptionfile_long.dta", clear
merge m:1 HH_ID using "`dpath'/`dfile'_xwavefile.dta"
save "`dpath'/`dfile'_consumptionfile_long_complete.dta", replace
save "`dpath'/`dfile'_consumptionfile_long_complete_`samplet'.dta", replace
*/
log close _all
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment