Commit f94c500b authored by Ben Anderson's avatar Ben Anderson
Browse files

changed flow so each subsection has own log and added analysis of excluded cases

parent 53b1dd3f
......@@ -35,11 +35,8 @@ GNU General Public License for more details.
* Requires: estout
clear all
capture noisily log close
set more off
* written for Mac OSX - remember to change filesystem delimiter for other platforms
......@@ -47,11 +44,16 @@ global home "~"
global dpath "$home/Documents/Work/Data/Social Science Datasets/DECC/NEED/End User Licence File 2014/processed"
global rpath "$home/Dropbox/RSS-2015/results"
*global rpath "$home/Dropbox/RSS-2015/NEED/results"
global rpath "$home/Documents/Work/Papers and Conferences/RSS-2015/NEED/results"
local version "v1"
local version "v1" // version management via github
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace
capture noisily log close _all
* start main log
* each subsection has own log
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_main.smcl", replace name(main)
* set sample
* 100 = 100pc
......@@ -69,7 +71,8 @@ local need_weight = "[pw = WEIGHT]" // use probability weights
local do_desc = 0 // do descriptives
local do_histo = 0 // toggle graph drawing
local do_box = 0 // toggle graph drawing
local do_xsec = 1 // run cross-sectional analysis
local do_excl = 0 // tests for correlates with exclusion
local do_xsec = 0 // run cross-sectional analysis
local do_longit = 1 // run longitudinal analysis
......@@ -111,13 +114,19 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0
* set up correct long form 'is X present in year' using year (if known)
local vars "BOILER LI CWI"
* what will hapen if there are multiple replacements in a household
* what will hapen if there are multiple replacements in a household?
foreach v of local vars {
gen ba_have_`v' = 0
destring `v'_YEAR, force replace
replace ba_have_`v' = 1 if `v'_YEAR <= year
}
* make sure test vars are destringed
local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER"
foreach tv of local tvars {
destring `tv', force replace
}
* labels
lab var Econs "Electricity (kWh/year)"
lab var Gcons "Gas (kWh/year)"
......@@ -140,6 +149,13 @@ foreach v of local setupvars {
lab val `v'Validr `v'Validr
tab `v'Validr `v'Valid
gen `v'_excl_hi = 0 if `v'Validr == 1 // valid
replace `v'_excl_hi = 1 if `v'Validr == 4 // high
gen `v'_excl_lo = 0 if `v'Validr == 1 // valid
replace `v'_excl_lo = 1 if `v'Validr == 3 // low
* set up consumption deciles and outlier flags
gen u99_`v' = 0
gen l99_`v' = 0
......@@ -149,7 +165,7 @@ foreach v of local setupvars {
levelsof(year), local(levels)
foreach l of local levels {
di "* Calculating consumption deciles and outlier flags for `v' for `l'"
* creates missing for other years have to do this as egen does not allow by & can't 'replace'
di "* `v' deciles for `l'"
egen `v'_dec_`l' = cut(`v') if year == `l', group(10)
......@@ -173,6 +189,7 @@ foreach v of local setupvars {
*tab m90_`v' if year == `l', mi
}
* now combine the deciles - set missing option otherwise it counts a row where all are missing as 0
egen `v'_dec = rowtotal(`v'_dec_*), missing
* remove temporary ones
......@@ -190,9 +207,6 @@ foreach v of local setupvars {
lab var log`v' "Log `v'"
}
* set 'survey' weight
svyset `need_weight'
* flag dwellings which are off gas for electricity
* NB - in this dataset we don't know if they use electricity as main heat (could be oil)
gen ba_off_gas = 0
......@@ -200,19 +214,25 @@ replace ba_off_gas = 1 if GconsValidr == 2
lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)"
lab val ba_off_gas ba_off_gas
* check
svy: mean Gcons Econs, over(ba_off_gas)
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
* add Gcons to loop over gas
local descvars "Econs"
if `do_desc' {
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'-do_desc.smcl", replace name(do_desc)
* set 'survey' weight
svyset `need_weight'
* check
svy: mean Gcons Econs, over(ba_off_gas)
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas)
* roughly constant rate throughout years
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons)
* but off gas have no gas readings as you'd expect (DECC applied filter)
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas)
* roughly constant rate throughout years
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons)
* but off gas have no gas readings as you'd expect (DECC applied filter)
local descvars "Econs"
if `do_desc' {
foreach v of local descvars {
di "***************"
di "* Testing `v' for `sample'% sample"
......@@ -268,26 +288,72 @@ if `do_desc' {
graph export "$rpath/graphs/NEED-EULF-2014-`sample'pc-box_`v'_yr_ee_valid.png", replace
}
di "* check the distributions of the outliers"
local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG"
foreach tv of local tvars {
di "* Checking top 1% against `tv'"
tab `tv' u99_Econs , col
}
}
log close do_desc
log on main
}
if `do_excl' {
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_excl.smcl", replace name(do_excl)
di "* running exclusion analysis"
di "* check the distributions of the outliers"
di "* % excluded as too high"
tab year Econs_excl_hi
di "* % excluded as too low"
tab year Econs_excl_lo
di "* exclusion models"
local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG"
foreach tv of local tvars {
di "* Checking Econs lowest 1% (+L) against `tv'"
tab `tv' u99_Econs , col
di "* Checking Econs highest 1% (+G) against `tv'"
tab `tv' u99_Econs , col
di "* Checking Econs L against `tv'"
tab `tv' Econs_excl_lo , col
di "* Checking Econs G against `tv'"
tab `tv' Econs_excl_hi , col
}
levelsof(year), local(years)
foreach y of local years {
di "* testing exclusions for `y'"
* use capture to avoid models failing where no exclusions (2010 ->)
capture noisily {
di "* Hi"
qui: logit Econs_excl_hi i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE ///
i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas ///
if year == `y'
estat gof
est store Econs_excl_hi_`y'
}
capture noisily {
di "* Lo"
qui: logit Econs_excl_lo i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE ///
i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas ///
if year == `y'
estat gof
est store Econs_excl_lo_`y'
}
}
estout Econs_excl_hi* using "$rpath/logit_Econs_excl_hi_`version'.txt", ///
cells("b se ci_u ci_l _star") ///
stats(r2_p ll N) replace
estout Econs_excl_lo* using "$rpath/logit_Econs_excl_lo_`version'.txt", ///
cells("b se ci_u ci_l _star") ///
stats(r2_p ll N) replace
log close do_excl
log on main
}
if `do_xsec' {
di "* Running cross sectional analysis using 2012 (all valid cases)"
* make sure test vars are destringed
local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER"
foreach tv of local tvars {
destring `tv', force replace
}
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_xsec.smcl", replace name(do_xsec)
di "* Running cross sectional analysis for electricity using 2012 (all valid cases)"
* kitchen sink model - use 99% signif level
* use vce(robust) as running without & using hettest suggests heteroscedasticity
* regress would throw out collinear variables
......@@ -309,11 +375,11 @@ if `do_xsec' {
* Plot residuals
* pnormal plot of residuals
pnorm logEconsr, name(pnorm_logEconsr)
graph export "$rpath/graphs/pnorm_logEconsr.png", replace
graph export "$rpath/graphs/pnorm_logEconsr_2012.png", replace
* qnormal plot
qnorm logEconsr, name(qnorm_logEconsr)
graph export "$rpath/graphs/qnorm_logEconsr.png", replace
graph export "$rpath/graphs/qnorm_logEconsr_2012.png", replace
* test the null hypothesis that the variance of the residuals is homogenous.
* Therefore, if the p-value is very small, we would have to reject the hypothesis
......@@ -348,10 +414,21 @@ if `do_xsec' {
* test BIC etc
estat ic
* preduce margins plot for floor area
margins i.FLOOR_AREA_BAND
marginsplot
graph box Econs if year == 2012, over(FLOOR_AREA_BAND)
graph export "$rpath/graphs/box_Econs_floor_area_2012.png", replace
log close do_xsec
log on main
}
if `do_longit' {
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_longit.smcl", replace name(do_longit)
di "* Running longitudinal analysis"
di "* Check boiler transitions"
xttrans ba_have_BOILER, freq
......@@ -415,8 +492,10 @@ if `do_longit' {
est store xtr_re_logEcons
estout xtr_re_logEcons using "$rpath/xtr_re_logEcons_`version'.txt", cells("b se ci_u ci_l _star ") stats(r2_w r2_b r2_o rmse N sigma_u sigma_e, fmt(%9.3f %9.0g)) replace
}
log close do_longit
log on main
}
di "* Done!"
log close
log close main
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment