From 50bcdcdcd44d0010e508d6169aa8fabc42ab172e Mon Sep 17 00:00:00 2001
From: Ben Anderson <dataknut@users.noreply.github.com>
Date: Fri, 12 Sep 2014 09:49:19 +0100
Subject: [PATCH] added note & data source, updated to v1.1

v1.1: included only gas households to avoid confounding effects of potential electric & oil heating (we don't have oil consumption)
---
 analyse-NEED-EULF-2014.do | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/analyse-NEED-EULF-2014.do b/analyse-NEED-EULF-2014.do
index 0147368..7e81dac 100644
--- a/analyse-NEED-EULF-2014.do
+++ b/analyse-NEED-EULF-2014.do
@@ -1,8 +1,10 @@
-* Script to analyse DECC's NEED data
-* NB this script uses 2 files derived from the original data
+* Script to analyse DECC's NEED data to:
+* investigate % variance of energy consumption due to dwelling type variables as a way to infer the % of variance due to people
+
+* NB this script uses 2 data files derived from the original data using the 'process' script
 
 * Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
-*
+* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
 
 * Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
 * b.anderson@soton.ac.uk
@@ -36,7 +38,17 @@ use "`dpath'/need_eul_may2014_consumptionfile_wide.dta", clear
 
 keep HH_ID *2012*
 
-* log the consumption
+* log the consumption as it's very skewed -> becomes semi-normal & OK for linear regression
+* Gcons = gas
+* Econs = Electricity
+* Presumably those without gas use oil or electricity for heating - we don't have oil so we should probably restrict analysis to gas-using hosueholds only to avoid this confounding factor?
+
+* check what's valid
+tab Gcons2012Valid Econs2012Valid, mi // what does G,L,M mean? Presumably O = off gas?
+tabstat Gcons2012, by(Gcons2012Valid) s(mean min max n)
+
+keep if Gcons2012Valid == "V"
+
 gen log_Gcons2012 = log(Gcons2012)
 gen log_Econs2012 = log(Econs2012)
 
@@ -88,6 +100,7 @@ local pt104 "mid_terr"
 local pt105 "bung"
 local pt106 "flat"
 
+* now loop over the energy types
 local vars "Gcons Econs Allcons"
 foreach v of local vars {
 	* all hhs model
@@ -105,7 +118,7 @@ foreach v of local vars {
 	di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
 	linktest	
 	
-	* by property type - to see if rsq & coefficients vary
+	* models by property type - to see if rsq & coefficients vary
 	foreach p of local ptypes {
 		di "* -> testing log_`v'2012 for `pt`p''"
 		qui: regress log_`v'2012 `generic_hvarsnp' ///
@@ -123,7 +136,7 @@ foreach v of local vars {
 		di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
 		linktest	
 	}
-	* for different consumption quintiles - to see if rsq & coefficients vary
+	* models for different consumption quintiles - to see if rsq & coefficients vary
 	foreach q of numlist 0/4 {
 		di "* -> testing log_`v'2012 for quintile: `q'"
 		qui: regress log_`v'2012 `generic_hvars' ///
@@ -144,6 +157,8 @@ foreach v of local vars {
 }
 
 * output all the results - that's a lot of t tests!
+* we could put them all out in one file but it would be really hard to find the ones you want!
+
 estout rlog_Gcons2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 estout rlog_Gcons2012q* using "`rpath'/NEED-EULF-2014-log-gas-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 estout rlog_Gcons2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
@@ -153,3 +168,7 @@ estout rlog_Econs2012_* using "`rpath'/NEED-EULF-2014-log-elec-models-by-propert
 estout rlog_Allcons2012 using "`rpath'/NEED-EULF-2014-log-energy-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 estout rlog_Allcons2012q* using "`rpath'/NEED-EULF-2014-log-energy-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
 estout rlog_Allcons2012_* using "`rpath'/NEED-EULF-2014-log-energy-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
+
+di "* Done!"
+
+log close
-- 
GitLab