Skip to content
Snippets Groups Projects
Commit f94c500b authored by Ben Anderson's avatar Ben Anderson
Browse files

changed flow so each subsection has own log and added analysis of excluded cases

parent 53b1dd3f
No related branches found
No related tags found
No related merge requests found
......@@ -35,11 +35,8 @@ GNU General Public License for more details.
* Requires: estout
clear all
capture noisily log close
set more off
* written for Mac OSX - remember to change filesystem delimiter for other platforms
......@@ -47,11 +44,16 @@ global home "~"
global dpath "$home/Documents/Work/Data/Social Science Datasets/DECC/NEED/End User Licence File 2014/processed"
global rpath "$home/Dropbox/RSS-2015/results"
*global rpath "$home/Dropbox/RSS-2015/NEED/results"
global rpath "$home/Documents/Work/Papers and Conferences/RSS-2015/NEED/results"
local version "v1"
local version "v1" // version management via github
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace
capture noisily log close _all
* start main log
* each subsection has own log
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_main.smcl", replace name(main)
* set sample
* 100 = 100pc
......@@ -69,7 +71,8 @@ local need_weight = "[pw = WEIGHT]" // use probability weights
local do_desc = 0 // do descriptives
local do_histo = 0 // toggle graph drawing
local do_box = 0 // toggle graph drawing
local do_xsec = 1 // run cross-sectional analysis
local do_excl = 0 // tests for correlates with exclusion
local do_xsec = 0 // run cross-sectional analysis
local do_longit = 1 // run longitudinal analysis
......@@ -111,13 +114,19 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0
* set up correct long form 'is X present in year' using year (if known)
local vars "BOILER LI CWI"
* what will hapen if there are multiple replacements in a household
* what will hapen if there are multiple replacements in a household?
foreach v of local vars {
gen ba_have_`v' = 0
destring `v'_YEAR, force replace
replace ba_have_`v' = 1 if `v'_YEAR <= year
}
* make sure test vars are destringed
local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER"
foreach tv of local tvars {
destring `tv', force replace
}
* labels
lab var Econs "Electricity (kWh/year)"
lab var Gcons "Gas (kWh/year)"
......@@ -140,6 +149,13 @@ foreach v of local setupvars {
lab val `v'Validr `v'Validr
tab `v'Validr `v'Valid
gen `v'_excl_hi = 0 if `v'Validr == 1 // valid
replace `v'_excl_hi = 1 if `v'Validr == 4 // high
gen `v'_excl_lo = 0 if `v'Validr == 1 // valid
replace `v'_excl_lo = 1 if `v'Validr == 3 // low
* set up consumption deciles and outlier flags
gen u99_`v' = 0
gen l99_`v' = 0
......@@ -149,7 +165,7 @@ foreach v of local setupvars {
levelsof(year), local(levels)
foreach l of local levels {
di "* Calculating consumption deciles and outlier flags for `v' for `l'"
* creates missing for other years have to do this as egen does not allow by & can't 'replace'
di "* `v' deciles for `l'"
egen `v'_dec_`l' = cut(`v') if year == `l', group(10)
......@@ -173,6 +189,7 @@ foreach v of local setupvars {
*tab m90_`v' if year == `l', mi
}
* now combine the deciles - set missing option otherwise it counts a row where all are missing as 0
egen `v'_dec = rowtotal(`v'_dec_*), missing
* remove temporary ones
......@@ -190,9 +207,6 @@ foreach v of local setupvars {
lab var log`v' "Log `v'"
}
* set 'survey' weight
svyset `need_weight'
* flag dwellings which are off gas for electricity
* NB - in this dataset we don't know if they use electricity as main heat (could be oil)
gen ba_off_gas = 0
......@@ -200,19 +214,25 @@ replace ba_off_gas = 1 if GconsValidr == 2
lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)"
lab val ba_off_gas ba_off_gas
* check
svy: mean Gcons Econs, over(ba_off_gas)
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
* add Gcons to loop over gas
local descvars "Econs"
if `do_desc' {
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'-do_desc.smcl", replace name(do_desc)
* set 'survey' weight
svyset `need_weight'
* check
svy: mean Gcons Econs, over(ba_off_gas)
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas)
* roughly constant rate throughout years
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons)
* but off gas have no gas readings as you'd expect (DECC applied filter)
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas)
* roughly constant rate throughout years
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons)
* but off gas have no gas readings as you'd expect (DECC applied filter)
local descvars "Econs"
if `do_desc' {
foreach v of local descvars {
di "***************"
di "* Testing `v' for `sample'% sample"
......@@ -268,26 +288,72 @@ if `do_desc' {
graph export "$rpath/graphs/NEED-EULF-2014-`sample'pc-box_`v'_yr_ee_valid.png", replace
}
di "* check the distributions of the outliers"
local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG"
foreach tv of local tvars {
di "* Checking top 1% against `tv'"
tab `tv' u99_Econs , col
}
}
log close do_desc
log on main
}
if `do_excl' {
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_excl.smcl", replace name(do_excl)
di "* running exclusion analysis"
di "* check the distributions of the outliers"
di "* % excluded as too high"
tab year Econs_excl_hi
di "* % excluded as too low"
tab year Econs_excl_lo
di "* exclusion models"
local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG"
foreach tv of local tvars {
di "* Checking Econs lowest 1% (+L) against `tv'"
tab `tv' u99_Econs , col
di "* Checking Econs highest 1% (+G) against `tv'"
tab `tv' u99_Econs , col
di "* Checking Econs L against `tv'"
tab `tv' Econs_excl_lo , col
di "* Checking Econs G against `tv'"
tab `tv' Econs_excl_hi , col
}
levelsof(year), local(years)
foreach y of local years {
di "* testing exclusions for `y'"
* use capture to avoid models failing where no exclusions (2010 ->)
capture noisily {
di "* Hi"
qui: logit Econs_excl_hi i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE ///
i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas ///
if year == `y'
estat gof
est store Econs_excl_hi_`y'
}
capture noisily {
di "* Lo"
qui: logit Econs_excl_lo i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE ///
i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas ///
if year == `y'
estat gof
est store Econs_excl_lo_`y'
}
}
estout Econs_excl_hi* using "$rpath/logit_Econs_excl_hi_`version'.txt", ///
cells("b se ci_u ci_l _star") ///
stats(r2_p ll N) replace
estout Econs_excl_lo* using "$rpath/logit_Econs_excl_lo_`version'.txt", ///
cells("b se ci_u ci_l _star") ///
stats(r2_p ll N) replace
log close do_excl
log on main
}
if `do_xsec' {
di "* Running cross sectional analysis using 2012 (all valid cases)"
* make sure test vars are destringed
local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER"
foreach tv of local tvars {
destring `tv', force replace
}
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_xsec.smcl", replace name(do_xsec)
di "* Running cross sectional analysis for electricity using 2012 (all valid cases)"
* kitchen sink model - use 99% signif level
* use vce(robust) as running without & using hettest suggests heteroscedasticity
* regress would throw out collinear variables
......@@ -309,11 +375,11 @@ if `do_xsec' {
* Plot residuals
* pnormal plot of residuals
pnorm logEconsr, name(pnorm_logEconsr)
graph export "$rpath/graphs/pnorm_logEconsr.png", replace
graph export "$rpath/graphs/pnorm_logEconsr_2012.png", replace
* qnormal plot
qnorm logEconsr, name(qnorm_logEconsr)
graph export "$rpath/graphs/qnorm_logEconsr.png", replace
graph export "$rpath/graphs/qnorm_logEconsr_2012.png", replace
* test the null hypothesis that the variance of the residuals is homogenous.
* Therefore, if the p-value is very small, we would have to reject the hypothesis
......@@ -348,10 +414,21 @@ if `do_xsec' {
* test BIC etc
estat ic
* preduce margins plot for floor area
margins i.FLOOR_AREA_BAND
marginsplot
graph box Econs if year == 2012, over(FLOOR_AREA_BAND)
graph export "$rpath/graphs/box_Econs_floor_area_2012.png", replace
log close do_xsec
log on main
}
if `do_longit' {
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_longit.smcl", replace name(do_longit)
di "* Running longitudinal analysis"
di "* Check boiler transitions"
xttrans ba_have_BOILER, freq
......@@ -415,8 +492,10 @@ if `do_longit' {
est store xtr_re_logEcons
estout xtr_re_logEcons using "$rpath/xtr_re_logEcons_`version'.txt", cells("b se ci_u ci_l _star ") stats(r2_w r2_b r2_o rmse N sigma_u sigma_e, fmt(%9.3f %9.0g)) replace
}
log close do_longit
log on main
}
di "* Done!"
log close
log close main
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment