Skip to content
Snippets Groups Projects
Commit f94c500b authored by Ben Anderson's avatar Ben Anderson
Browse files

changed flow so each subsection has own log and added analysis of excluded cases

parent 53b1dd3f
Branches
No related tags found
No related merge requests found
...@@ -35,11 +35,8 @@ GNU General Public License for more details. ...@@ -35,11 +35,8 @@ GNU General Public License for more details.
* Requires: estout * Requires: estout
clear all clear all
capture noisily log close
set more off set more off
* written for Mac OSX - remember to change filesystem delimiter for other platforms * written for Mac OSX - remember to change filesystem delimiter for other platforms
...@@ -47,11 +44,16 @@ global home "~" ...@@ -47,11 +44,16 @@ global home "~"
global dpath "$home/Documents/Work/Data/Social Science Datasets/DECC/NEED/End User Licence File 2014/processed" global dpath "$home/Documents/Work/Data/Social Science Datasets/DECC/NEED/End User Licence File 2014/processed"
global rpath "$home/Dropbox/RSS-2015/results" *global rpath "$home/Dropbox/RSS-2015/NEED/results"
global rpath "$home/Documents/Work/Papers and Conferences/RSS-2015/NEED/results"
local version "v1" local version "v1" // version management via github
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace capture noisily log close _all
* start main log
* each subsection has own log
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_main.smcl", replace name(main)
* set sample * set sample
* 100 = 100pc * 100 = 100pc
...@@ -69,7 +71,8 @@ local need_weight = "[pw = WEIGHT]" // use probability weights ...@@ -69,7 +71,8 @@ local need_weight = "[pw = WEIGHT]" // use probability weights
local do_desc = 0 // do descriptives local do_desc = 0 // do descriptives
local do_histo = 0 // toggle graph drawing local do_histo = 0 // toggle graph drawing
local do_box = 0 // toggle graph drawing local do_box = 0 // toggle graph drawing
local do_xsec = 1 // run cross-sectional analysis local do_excl = 0 // tests for correlates with exclusion
local do_xsec = 0 // run cross-sectional analysis
local do_longit = 1 // run longitudinal analysis local do_longit = 1 // run longitudinal analysis
...@@ -111,13 +114,19 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0 ...@@ -111,13 +114,19 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0
* set up correct long form 'is X present in year' using year (if known) * set up correct long form 'is X present in year' using year (if known)
local vars "BOILER LI CWI" local vars "BOILER LI CWI"
* what will hapen if there are multiple replacements in a household * what will hapen if there are multiple replacements in a household?
foreach v of local vars { foreach v of local vars {
gen ba_have_`v' = 0 gen ba_have_`v' = 0
destring `v'_YEAR, force replace destring `v'_YEAR, force replace
replace ba_have_`v' = 1 if `v'_YEAR <= year replace ba_have_`v' = 1 if `v'_YEAR <= year
} }
* make sure test vars are destringed
local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER"
foreach tv of local tvars {
destring `tv', force replace
}
* labels * labels
lab var Econs "Electricity (kWh/year)" lab var Econs "Electricity (kWh/year)"
lab var Gcons "Gas (kWh/year)" lab var Gcons "Gas (kWh/year)"
...@@ -140,6 +149,13 @@ foreach v of local setupvars { ...@@ -140,6 +149,13 @@ foreach v of local setupvars {
lab val `v'Validr `v'Validr lab val `v'Validr `v'Validr
tab `v'Validr `v'Valid tab `v'Validr `v'Valid
gen `v'_excl_hi = 0 if `v'Validr == 1 // valid
replace `v'_excl_hi = 1 if `v'Validr == 4 // high
gen `v'_excl_lo = 0 if `v'Validr == 1 // valid
replace `v'_excl_lo = 1 if `v'Validr == 3 // low
* set up consumption deciles and outlier flags * set up consumption deciles and outlier flags
gen u99_`v' = 0 gen u99_`v' = 0
gen l99_`v' = 0 gen l99_`v' = 0
...@@ -149,7 +165,7 @@ foreach v of local setupvars { ...@@ -149,7 +165,7 @@ foreach v of local setupvars {
levelsof(year), local(levels) levelsof(year), local(levels)
foreach l of local levels { foreach l of local levels {
di "* Calculating consumption deciles and outlier flags for `v' for `l'" di "* Calculating consumption deciles and outlier flags for `v' for `l'"
* creates missing for other years have to do this as egen does not allow by & can't 'replace' * creates missing for other years have to do this as egen does not allow by & can't 'replace'
di "* `v' deciles for `l'" di "* `v' deciles for `l'"
egen `v'_dec_`l' = cut(`v') if year == `l', group(10) egen `v'_dec_`l' = cut(`v') if year == `l', group(10)
...@@ -173,6 +189,7 @@ foreach v of local setupvars { ...@@ -173,6 +189,7 @@ foreach v of local setupvars {
*tab m90_`v' if year == `l', mi *tab m90_`v' if year == `l', mi
} }
* now combine the deciles - set missing option otherwise it counts a row where all are missing as 0 * now combine the deciles - set missing option otherwise it counts a row where all are missing as 0
egen `v'_dec = rowtotal(`v'_dec_*), missing egen `v'_dec = rowtotal(`v'_dec_*), missing
* remove temporary ones * remove temporary ones
...@@ -190,9 +207,6 @@ foreach v of local setupvars { ...@@ -190,9 +207,6 @@ foreach v of local setupvars {
lab var log`v' "Log `v'" lab var log`v' "Log `v'"
} }
* set 'survey' weight
svyset `need_weight'
* flag dwellings which are off gas for electricity * flag dwellings which are off gas for electricity
* NB - in this dataset we don't know if they use electricity as main heat (could be oil) * NB - in this dataset we don't know if they use electricity as main heat (could be oil)
gen ba_off_gas = 0 gen ba_off_gas = 0
...@@ -200,19 +214,25 @@ replace ba_off_gas = 1 if GconsValidr == 2 ...@@ -200,19 +214,25 @@ replace ba_off_gas = 1 if GconsValidr == 2
lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)" lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)"
lab val ba_off_gas ba_off_gas lab val ba_off_gas ba_off_gas
* check * add Gcons to loop over gas
svy: mean Gcons Econs, over(ba_off_gas) local descvars "Econs"
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'" if `do_desc' {
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'-do_desc.smcl", replace name(do_desc)
* set 'survey' weight
svyset `need_weight'
* check
svy: mean Gcons Econs, over(ba_off_gas)
di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'? table ba_off_gas MAIN_HEAT_FUEL `need_weight', missing // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas)
* roughly constant rate throughout years * roughly constant rate throughout years
table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons) table year MAIN_HEAT_FUEL `need_weight', by(ba_off_gas) c(mean Gcons n Gcons)
* but off gas have no gas readings as you'd expect (DECC applied filter) * but off gas have no gas readings as you'd expect (DECC applied filter)
local descvars "Econs"
if `do_desc' {
foreach v of local descvars { foreach v of local descvars {
di "***************" di "***************"
di "* Testing `v' for `sample'% sample" di "* Testing `v' for `sample'% sample"
...@@ -268,26 +288,72 @@ if `do_desc' { ...@@ -268,26 +288,72 @@ if `do_desc' {
graph export "$rpath/graphs/NEED-EULF-2014-`sample'pc-box_`v'_yr_ee_valid.png", replace graph export "$rpath/graphs/NEED-EULF-2014-`sample'pc-box_`v'_yr_ee_valid.png", replace
} }
}
di "* check the distributions of the outliers" log close do_desc
local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG" log on main
foreach tv of local tvars { }
di "* Checking top 1% against `tv'"
tab `tv' u99_Econs , col
}
if `do_excl' {
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_excl.smcl", replace name(do_excl)
di "* running exclusion analysis"
di "* check the distributions of the outliers"
di "* % excluded as too high"
tab year Econs_excl_hi
di "* % excluded as too low"
tab year Econs_excl_lo
di "* exclusion models"
local tvars "MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG"
foreach tv of local tvars {
di "* Checking Econs lowest 1% (+L) against `tv'"
tab `tv' u99_Econs , col
di "* Checking Econs highest 1% (+G) against `tv'"
tab `tv' u99_Econs , col
di "* Checking Econs L against `tv'"
tab `tv' Econs_excl_lo , col
di "* Checking Econs G against `tv'"
tab `tv' Econs_excl_hi , col
}
levelsof(year), local(years)
foreach y of local years {
di "* testing exclusions for `y'"
* use capture to avoid models failing where no exclusions (2010 ->)
capture noisily {
di "* Hi"
qui: logit Econs_excl_hi i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE ///
i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas ///
if year == `y'
estat gof
est store Econs_excl_hi_`y'
}
capture noisily {
di "* Lo"
qui: logit Econs_excl_lo i.E7Flag2012 i.MAIN_HEAT_FUEL i.PROP_AGE ///
i.PROP_TYPE i.FLOOR_AREA_BAND i.EE_BAND ba_off_gas ///
if year == `y'
estat gof
est store Econs_excl_lo_`y'
}
} }
estout Econs_excl_hi* using "$rpath/logit_Econs_excl_hi_`version'.txt", ///
cells("b se ci_u ci_l _star") ///
stats(r2_p ll N) replace
estout Econs_excl_lo* using "$rpath/logit_Econs_excl_lo_`version'.txt", ///
cells("b se ci_u ci_l _star") ///
stats(r2_p ll N) replace
log close do_excl
log on main
} }
if `do_xsec' { if `do_xsec' {
di "* Running cross sectional analysis using 2012 (all valid cases)" log off main
* make sure test vars are destringed log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_xsec.smcl", replace name(do_xsec)
local tvars "IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER" di "* Running cross sectional analysis for electricity using 2012 (all valid cases)"
foreach tv of local tvars {
destring `tv', force replace
}
* kitchen sink model - use 99% signif level * kitchen sink model - use 99% signif level
* use vce(robust) as running without & using hettest suggests heteroscedasticity * use vce(robust) as running without & using hettest suggests heteroscedasticity
* regress would throw out collinear variables * regress would throw out collinear variables
...@@ -309,11 +375,11 @@ if `do_xsec' { ...@@ -309,11 +375,11 @@ if `do_xsec' {
* Plot residuals * Plot residuals
* pnormal plot of residuals * pnormal plot of residuals
pnorm logEconsr, name(pnorm_logEconsr) pnorm logEconsr, name(pnorm_logEconsr)
graph export "$rpath/graphs/pnorm_logEconsr.png", replace graph export "$rpath/graphs/pnorm_logEconsr_2012.png", replace
* qnormal plot * qnormal plot
qnorm logEconsr, name(qnorm_logEconsr) qnorm logEconsr, name(qnorm_logEconsr)
graph export "$rpath/graphs/qnorm_logEconsr.png", replace graph export "$rpath/graphs/qnorm_logEconsr_2012.png", replace
* test the null hypothesis that the variance of the residuals is homogenous. * test the null hypothesis that the variance of the residuals is homogenous.
* Therefore, if the p-value is very small, we would have to reject the hypothesis * Therefore, if the p-value is very small, we would have to reject the hypothesis
...@@ -348,10 +414,21 @@ if `do_xsec' { ...@@ -348,10 +414,21 @@ if `do_xsec' {
* test BIC etc * test BIC etc
estat ic estat ic
* preduce margins plot for floor area
margins i.FLOOR_AREA_BAND
marginsplot
graph box Econs if year == 2012, over(FLOOR_AREA_BAND)
graph export "$rpath/graphs/box_Econs_floor_area_2012.png", replace
log close do_xsec
log on main
} }
if `do_longit' { if `do_longit' {
log off main
log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_longit.smcl", replace name(do_longit)
di "* Running longitudinal analysis" di "* Running longitudinal analysis"
di "* Check boiler transitions" di "* Check boiler transitions"
xttrans ba_have_BOILER, freq xttrans ba_have_BOILER, freq
...@@ -415,8 +492,10 @@ if `do_longit' { ...@@ -415,8 +492,10 @@ if `do_longit' {
est store xtr_re_logEcons est store xtr_re_logEcons
estout xtr_re_logEcons using "$rpath/xtr_re_logEcons_`version'.txt", cells("b se ci_u ci_l _star ") stats(r2_w r2_b r2_o rmse N sigma_u sigma_e, fmt(%9.3f %9.0g)) replace estout xtr_re_logEcons using "$rpath/xtr_re_logEcons_`version'.txt", cells("b se ci_u ci_l _star ") stats(r2_w r2_b r2_o rmse N sigma_u sigma_e, fmt(%9.3f %9.0g)) replace
} }
log close do_longit
log on main
} }
di "* Done!" di "* Done!"
log close log close main
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment