diff --git a/NEED/analyse-NEED-EULF-2014-descriptives.do b/NEED/analyse-NEED-EULF-2014-descriptives.do index 2b0b524b69214b2acc14d8a566c4b086bb043a53..f329ad2f9ee1498a17314bb88e4af3de496ed084 100644 --- a/NEED/analyse-NEED-EULF-2014-descriptives.do +++ b/NEED/analyse-NEED-EULF-2014-descriptives.do @@ -40,6 +40,8 @@ local dpath "`proot'/NEED/End User Licence File 2014/processed" local rpath "`proot'/results/NEED" local version "v1.1" +* set sample +local sample "100pc" * quick tests for 2012 local do_2012_desc = 0 @@ -65,6 +67,7 @@ if `do_2012_desc' { * processor intensive local vars "Econs2012 Gcons2012" local tvars "EE_BAND FLOOR_AREA_BAND PROP_AGE" + * test values for valid - check for valid 0s for example. This only happens for gas where: * 100 < gcons < 250 so included but rounded to nearest 500 = 0 @@ -80,7 +83,7 @@ if `do_2012_desc' { tab `v' if `v' < 1000 if `do_graphs' { histogram `v' if `v'Valid == "V", by(`tv') name(h_`tv'_`v'_`sample') - graph export "`rpath'/NEED-EULF-2014-histo_`v'_by_`tv'_`sample'_valid.png", replace + graph export "`rpath'/graphs/NEED-EULF-2014-histo_`v'_by_`tv'_`sample'_valid.png", replace graph box `v' if `v'Valid == "V", over(`tv') name(b_`tv'_`v'_`sample') graph export "`rpath'/NEED-EULF-2014-box_`v'_by_`tv'_`sample'_valid.png", replace diff --git a/NEED/analyse-NEED-EULF-2014-models-v2.0.do b/NEED/analyse-NEED-EULF-2014-models-v2.0.do index e656ad58b2b0a61674a0a50806c904ae02cefa63..164c37769a260a7f256e0a46069eec634d6b789b 100644 --- a/NEED/analyse-NEED-EULF-2014-models-v2.0.do +++ b/NEED/analyse-NEED-EULF-2014-models-v2.0.do @@ -39,28 +39,28 @@ local proot "`home'/Work/Data/Social Science Datatsets/DECC" local dpath "`proot'/NEED/End User Licence File 2014/processed" local rpath "`proot'/results/NEED" -*local verrsion "1.0" +*local version "1.0" * initial models - all households for electricity models -*local verrsion "1.1" +*local version "1.1" * restrict to gas only households to avoid complications of: * - primary electric heating (presumably) * - oil heating -*local version "v2a_1pc" +*local version "v2_1pc" *local sample 1 *local sampleby "EE_BAND PROP_TYPE" * changed from using log consumption to consumption decile to avoid complications due to variable rounding ranges in original data (see readme) * restricted analysis to households where gas is main heat source as it is better predicted by variables included & is more relevant to EPC (heat) * uses 1% sample (c 30k) making sure keep proportions of property type and EE_Band to see if linktest fails with smaller n -*local version "v2b_10pc" +*local version "v2_10pc" *local sample 10 *local sampleby "EE_BAND PROP_TYPE" * uses 10% sample (c 300k) making sure keep proportions of property type and EE_Band to see if margin plots and co-efficients are the same * (linktest etc will probably now fail due to larger n) -local version "v2c_full" +local version "v2_100pc" local sample 100 local sampleby "EE_BAND PROP_TYPE" * uses full sample (c 3m) to see if margin plots and co-efficients are the same @@ -71,14 +71,14 @@ set more off log using "`rpath'/analyse-NEED-EULF-2014-models-`version'-$S_DATE.smcl", replace * use the pre-processed wide form file which contains all years of consumption data but not the constant values which are in the xwave file -use "`dpath'/need_eul_may2014_consumptionfile_wide.dta", clear +use "`dpath'/need_eul_may2014_consumptionfile_wide_`sample'pc.dta", clear * we're going to use 2012 data only keep HH_ID *2012* * merge in the pre-processed cross-year fixed values file -merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile.dta" +merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile_`sample'pc.dta" * check what's valid tab Gcons2012Valid Econs2012Valid, mi // O = off gas, V = valid, L = too low, G = too big, M = missing @@ -87,7 +87,7 @@ tabstat Gcons2012, by(Gcons2012Valid) s(mean min max n) tabstat Econs2012, by(Gcons2012Valid) s(mean min max n) histogram Gcons2012, by(MAIN_HEAT_FUEL, total) name(histo_Gcons2012) -graph export "`rpath'/histo_Gcons2012_by_main_heating_fuel.png", replace +graph export "`rpath'/graphs/histo_Gcons2012_by_main_heating_fuel_`version'.png", replace tabstat Gcons2012, by(MAIN_HEAT_FUEL) s(n mean min max) @@ -157,7 +157,7 @@ local pt101 "detached" local pt102 "semi" local pt103 "end_terr" local pt104 "mid_terr" -local pt105 "bung" +local pt105 "bungalow" local pt106 "flat" * now loop over the energy types & run linear regression models @@ -190,7 +190,7 @@ foreach v of local vars { di "* test EPC margins for `v'" margins EE_BAND marginsplot, name(mplot_`v'_EE_BAND) - graph export "`rpath'/mplot_`v'_EE_BAND-`version'.png", replace + graph export "`rpath'/graphs/mplot_`v'_EE_BAND-`version'.png", replace * models by property type - to see if rsq & coefficients vary foreach p of local ptypes { @@ -213,7 +213,7 @@ foreach v of local vars { di "* test EPC margins for `v' (`pt`p'')" margins EE_BAND marginsplot, name(mplot_`v'_EE_BAND_`pt`p'') - graph export "`rpath'/mplot_`v'_EE_BAND_`pt`p''-`version'.png", replace + graph export "`rpath'/graphs/mplot_`v'_EE_BAND_`pt`p''-`version'.png", replace } * models for different consumption quintiles - to see if rsq & coefficients vary @@ -241,11 +241,11 @@ foreach v of local vars { * output all the results - that's a lot of t tests! * we could put them all out in one file but it would be really hard to find the ones you want! -estout lg2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout lg2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) +estout lg2012 using "`rpath'/models/NEED-EULF-2014-log-gas-model-`version'.txt", replace cells("b se p _star") stats(r2 r2_a N ll) +estout lg2012_* using "`rpath'/models/NEED-EULF-2014-log-gas-models-by-property-type-`version'.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout g2012dec using "`rpath'/NEED-EULF-2014-gas-deciles-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) -estout g2012dec_* using "`rpath'/NEED-EULF-2014-gas-deciles-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll) +estout g2012dec using "`rpath'/models/NEED-EULF-2014-gas-deciles-model-`version'.txt", replace cells("b se p _star") stats(r2 r2_a N ll) +estout g2012dec_* using "`rpath'/models/NEED-EULF-2014-gas-deciles-models-by-property-type-`version'.txt", replace cells("b se p _star") stats(r2 r2_a N ll) di "* Done!"