Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
DECC-data
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Ben Anderson
DECC-data
Commits
f94c500b
Commit
f94c500b
authored
9 years ago
by
Ben Anderson
Browse files
Options
Downloads
Patches
Plain Diff
changed flow so each subsection has own log and added analysis of excluded cases
parent
53b1dd3f
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
NEED/analyse-NEED-EULF-2014-electricity-consumption.do
+118
-39
118 additions, 39 deletions
NEED/analyse-NEED-EULF-2014-electricity-consumption.do
with
118 additions
and
39 deletions
NEED/analyse-NEED-EULF-2014-electricity-consumption.do
+
118
−
39
View file @
f94c500b
...
@@ -35,11 +35,8 @@ GNU General Public License for more details.
...
@@ -35,11 +35,8 @@ GNU General Public License for more details.
* Requires: estout
* Requires: estout
clear
all
clear
all
capture
noisily
log
close
set
more
off
set
more
off
* written for Mac OSX - remember to change filesystem delimiter for other platforms
* written for Mac OSX - remember to change filesystem delimiter for other platforms
...
@@ -47,11 +44,16 @@ global home "~"
...
@@ -47,11 +44,16 @@ global home "~"
global
dpath
"$home/Documents/Work/Data/Social Science Datasets/DECC/NEED/End User Licence File 2014/processed"
global
dpath
"$home/Documents/Work/Data/Social Science Datasets/DECC/NEED/End User Licence File 2014/processed"
global
rpath
"$home/Dropbox/RSS-2015/results"
*global rpath "$home/Dropbox/RSS-2015/NEED/results"
global
rpath
"$home/Documents/Work/Papers and Conferences/RSS-2015/NEED/results"
local
version
"v1"
local
version
"v1"
// version management via github
log
using
"$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl"
,
replace
capture
noisily
log
close
_all
* start main log
* each subsection has own log
log
using
"$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_main.smcl"
,
replace
name
(
main
)
* set sample
* set sample
* 100 = 100pc
* 100 = 100pc
...
@@ -69,7 +71,8 @@ local need_weight = "[pw = WEIGHT]" // use probability weights
...
@@ -69,7 +71,8 @@ local need_weight = "[pw = WEIGHT]" // use probability weights
local
do_desc
=
0
// do descriptives
local
do_desc
=
0
// do descriptives
local
do_histo
=
0
// toggle graph drawing
local
do_histo
=
0
// toggle graph drawing
local
do_box
=
0
// toggle graph drawing
local
do_box
=
0
// toggle graph drawing
local
do_xsec
=
1
// run cross-sectional analysis
local
do_excl
=
0
// tests for correlates with exclusion
local
do_xsec
=
0
// run cross-sectional analysis
local
do_longit
=
1
// run longitudinal analysis
local
do_longit
=
1
// run longitudinal analysis
...
@@ -111,13 +114,19 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0
...
@@ -111,13 +114,19 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0
* set up correct long form 'is X present in year' using year (if known)
* set up correct long form 'is X present in year' using year (if known)
local
vars
"BOILER LI CWI"
local
vars
"BOILER LI CWI"
* what will hapen if there are multiple replacements in a household
* what will hapen if there are multiple replacements in a household
?
foreach
v
of
local
vars
{
foreach
v
of
local
vars
{
gen
ba_have_
`v'
=
0
gen
ba_have_
`v'
=
0
destring
`v'
_YEAR
,
force
replace
destring
`v'
_YEAR
,
force
replace
replace
ba_have_
`v'
=
1
if
`v'
_YEAR
<=
year
replace
ba_have_
`v'
=
1
if
`v'
_YEAR
<=
year
}
}
* make sure test vars are destringed
local
tvars
"IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER"
foreach
tv
of
local
tvars
{
destring
`tv'
,
force
replace
}
* labels
* labels
lab
var
Econs
"Electricity (kWh/year)"
lab
var
Econs
"Electricity (kWh/year)"
lab
var
Gcons
"Gas (kWh/year)"
lab
var
Gcons
"Gas (kWh/year)"
...
@@ -140,6 +149,13 @@ foreach v of local setupvars {
...
@@ -140,6 +149,13 @@ foreach v of local setupvars {
lab
val
`v'
Validr
`v'
Validr
lab
val
`v'
Validr
`v'
Validr
tab
`v'
Validr
`v'
Valid
tab
`v'
Validr
`v'
Valid
gen
`v'
_excl_hi
=
0
if
`v'
Validr
==
1
// valid
replace
`v'
_excl_hi
=
1
if
`v'
Validr
==
4
// high
gen
`v'
_excl_lo
=
0
if
`v'
Validr
==
1
// valid
replace
`v'
_excl_lo
=
1
if
`v'
Validr
==
3
// low
* set up consumption deciles and outlier flags
* set up consumption deciles and outlier flags
gen
u99_
`v'
=
0
gen
u99_
`v'
=
0
gen
l99_
`v'
=
0
gen
l99_
`v'
=
0
...
@@ -149,7 +165,7 @@ foreach v of local setupvars {
...
@@ -149,7 +165,7 @@ foreach v of local setupvars {
levelsof
(
year
),
local
(
levels
)
levelsof
(
year
),
local
(
levels
)
foreach
l
of
local
levels
{
foreach
l
of
local
levels
{
di
"* Calculating consumption deciles and outlier flags for `v' for `l'"
di
"* Calculating consumption deciles and outlier flags for `v' for `l'"
* creates missing for other years have to do this as egen does not allow by & can't 'replace'
* creates missing for other years have to do this as egen does not allow by & can't 'replace'
di
"* `v' deciles for `l'"
di
"* `v' deciles for `l'"
egen
`v'
_dec_
`l'
=
cut
(
`v'
)
if
year
==
`l'
,
group
(
10
)
egen
`v'
_dec_
`l'
=
cut
(
`v'
)
if
year
==
`l'
,
group
(
10
)
...
@@ -173,6 +189,7 @@ foreach v of local setupvars {
...
@@ -173,6 +189,7 @@ foreach v of local setupvars {
*tab m90_`v' if year == `l', mi
*tab m90_`v' if year == `l', mi
}
}
* now combine the deciles - set missing option otherwise it counts a row where all are missing as 0
* now combine the deciles - set missing option otherwise it counts a row where all are missing as 0
egen
`v'
_dec
=
rowtotal
(
`v'
_dec_
*
),
missing
egen
`v'
_dec
=
rowtotal
(
`v'
_dec_
*
),
missing
* remove temporary ones
* remove temporary ones
...
@@ -190,9 +207,6 @@ foreach v of local setupvars {
...
@@ -190,9 +207,6 @@ foreach v of local setupvars {
lab
var
log
`v'
"Log `v'"
lab
var
log
`v'
"Log `v'"
}
}
* set 'survey' weight
svyset
`need_weight'
* flag dwellings which are off gas for electricity
* flag dwellings which are off gas for electricity
* NB - in this dataset we don't know if they use electricity as main heat (could be oil)
* NB - in this dataset we don't know if they use electricity as main heat (could be oil)
gen
ba_off_gas
=
0
gen
ba_off_gas
=
0
...
@@ -200,19 +214,25 @@ replace ba_off_gas = 1 if GconsValidr == 2
...
@@ -200,19 +214,25 @@ replace ba_off_gas = 1 if GconsValidr == 2
lab
def
ba_off_gas
0
"On gas (GconsValid!=O)"
1
"Off gas (GconsValid=O, from EPC)"
lab
def
ba_off_gas
0
"On gas (GconsValid!=O)"
1
"Off gas (GconsValid=O, from EPC)"
lab
val
ba_off_gas
ba_off_gas
lab
val
ba_off_gas
ba_off_gas
* check
* add Gcons to loop over gas
svy
:
mean
Gcons
Econs
,
over
(
ba_off_gas
)
local
descvars
"Econs"
di
"* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
if
`do_desc'
{
log
off
main
log
using
"$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'-do_desc.smcl"
,
replace
name
(
do_desc
)
* set 'survey' weight
svyset
`need_weight'
* check
svy
:
mean
Gcons
Econs
,
over
(
ba_off_gas
)
di
"* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'"
table
ba_off_gas
MAIN_HEAT_FUEL
`need_weight'
,
missing
// suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
table
ba_off_gas
MAIN_HEAT_FUEL
`need_weight'
,
missing
// suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'?
table
year
MAIN_HEAT_FUEL
`need_weight'
,
by
(
ba_off_gas
)
table
year
MAIN_HEAT_FUEL
`need_weight'
,
by
(
ba_off_gas
)
* roughly constant rate throughout years
* roughly constant rate throughout years
table
year
MAIN_HEAT_FUEL
`need_weight'
,
by
(
ba_off_gas
)
c
(
mean
Gcons
n
Gcons
)
table
year
MAIN_HEAT_FUEL
`need_weight'
,
by
(
ba_off_gas
)
c
(
mean
Gcons
n
Gcons
)
* but off gas have no gas readings as you'd expect (DECC applied filter)
* but off gas have no gas readings as you'd expect (DECC applied filter)
local
descvars
"Econs"
if
`do_desc'
{
foreach
v
of
local
descvars
{
foreach
v
of
local
descvars
{
di
"***************"
di
"***************"
di
"* Testing `v' for `sample'% sample"
di
"* Testing `v' for `sample'% sample"
...
@@ -268,26 +288,72 @@ if `do_desc' {
...
@@ -268,26 +288,72 @@ if `do_desc' {
graph
export
"$rpath/graphs/NEED-EULF-2014-`sample'pc-box_`v'_yr_ee_valid.png"
,
replace
graph
export
"$rpath/graphs/NEED-EULF-2014-`sample'pc-box_`v'_yr_ee_valid.png"
,
replace
}
}
}
di
"* check the distributions of the outliers"
log
close
do_desc
local
tvars
"MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG"
log
on
main
foreach
tv
of
local
tvars
{
}
di
"* Checking top 1% against `tv'"
tab
`tv'
u99_Econs
,
col
}
if
`do_excl'
{
log
off
main
log
using
"$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_excl.smcl"
,
replace
name
(
do_excl
)
di
"* running exclusion analysis"
di
"* check the distributions of the outliers"
di
"* % excluded as too high"
tab
year
Econs_excl_hi
di
"* % excluded as too low"
tab
year
Econs_excl_lo
di
"* exclusion models"
local
tvars
"MAIN_HEAT_FUEL E7Flag2012 ba_off_gas FLOOR_AREA_BAND EE_BAND IMD_ENG"
foreach
tv
of
local
tvars
{
di
"* Checking Econs lowest 1% (+L) against `tv'"
tab
`tv'
u99_Econs
,
col
di
"* Checking Econs highest 1% (+G) against `tv'"
tab
`tv'
u99_Econs
,
col
di
"* Checking Econs L against `tv'"
tab
`tv'
Econs_excl_lo
,
col
di
"* Checking Econs G against `tv'"
tab
`tv'
Econs_excl_hi
,
col
}
levelsof
(
year
),
local
(
years
)
foreach
y
of
local
years
{
di
"* testing exclusions for `y'"
* use capture to avoid models failing where no exclusions (2010 ->)
capture
noisily
{
di
"* Hi"
qui
:
logit
Econs_excl_hi
i.
E7Flag2012
i.
MAIN_HEAT_FUEL
i.
PROP_AGE
///
i.
PROP_TYPE
i.
FLOOR_AREA_BAND
i.
EE_BAND
ba_off_gas
///
if
year
==
`y'
estat
gof
est
store
Econs_excl_hi_
`y'
}
capture
noisily
{
di
"* Lo"
qui
:
logit
Econs_excl_lo
i.
E7Flag2012
i.
MAIN_HEAT_FUEL
i.
PROP_AGE
///
i.
PROP_TYPE
i.
FLOOR_AREA_BAND
i.
EE_BAND
ba_off_gas
///
if
year
==
`y'
estat
gof
est
store
Econs_excl_lo_
`y'
}
}
}
estout
Econs_excl_hi
*
using
"$rpath/logit_Econs_excl_hi_`version'.txt"
,
///
cells
(
"b se ci_u ci_l _star"
)
///
stats
(
r2_p
ll
N
)
replace
estout
Econs_excl_lo
*
using
"$rpath/logit_Econs_excl_lo_`version'.txt"
,
///
cells
(
"b se ci_u ci_l _star"
)
///
stats
(
r2_p
ll
N
)
replace
log
close
do_excl
log
on
main
}
}
if
`do_xsec'
{
if
`do_xsec'
{
di
"* Running cross sectional analysis using 2012 (all valid cases)"
log
off
main
* make sure test vars are destringed
log
using
"$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_xsec.smcl"
,
replace
name
(
do_xsec
)
local
tvars
"IMD_ENG FP_ENG E7Flag2012 MAIN_HEAT_FUEL PROP_AGE PROP_TYPE FLOOR_AREA_BAND EE_BAND LOFT_DEPTH WALL_CONS CWI BOILER"
di
"* Running cross sectional analysis for electricity using 2012 (all valid cases)"
foreach
tv
of
local
tvars
{
destring
`tv'
,
force
replace
}
* kitchen sink model - use 99% signif level
* kitchen sink model - use 99% signif level
* use vce(robust) as running without & using hettest suggests heteroscedasticity
* use vce(robust) as running without & using hettest suggests heteroscedasticity
* regress would throw out collinear variables
* regress would throw out collinear variables
...
@@ -309,11 +375,11 @@ if `do_xsec' {
...
@@ -309,11 +375,11 @@ if `do_xsec' {
* Plot residuals
* Plot residuals
* pnormal plot of residuals
* pnormal plot of residuals
pnorm
logEconsr
,
name
(
pnorm_logEconsr
)
pnorm
logEconsr
,
name
(
pnorm_logEconsr
)
graph
export
"$rpath/graphs/pnorm_logEconsr.png"
,
replace
graph
export
"$rpath/graphs/pnorm_logEconsr
_2012
.png"
,
replace
* qnormal plot
* qnormal plot
qnorm
logEconsr
,
name
(
qnorm_logEconsr
)
qnorm
logEconsr
,
name
(
qnorm_logEconsr
)
graph
export
"$rpath/graphs/qnorm_logEconsr.png"
,
replace
graph
export
"$rpath/graphs/qnorm_logEconsr
_2012
.png"
,
replace
* test the null hypothesis that the variance of the residuals is homogenous.
* test the null hypothesis that the variance of the residuals is homogenous.
* Therefore, if the p-value is very small, we would have to reject the hypothesis
* Therefore, if the p-value is very small, we would have to reject the hypothesis
...
@@ -348,10 +414,21 @@ if `do_xsec' {
...
@@ -348,10 +414,21 @@ if `do_xsec' {
* test BIC etc
* test BIC etc
estat
ic
estat
ic
* preduce margins plot for floor area
margins
i.
FLOOR_AREA_BAND
marginsplot
graph
box
Econs
if
year
==
2012
,
over
(
FLOOR_AREA_BAND
)
graph
export
"$rpath/graphs/box_Econs_floor_area_2012.png"
,
replace
log
close
do_xsec
log
on
main
}
}
if
`do_longit'
{
if
`do_longit'
{
log
off
main
log
using
"$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'_do_longit.smcl"
,
replace
name
(
do_longit
)
di
"* Running longitudinal analysis"
di
"* Running longitudinal analysis"
di
"* Check boiler transitions"
di
"* Check boiler transitions"
xttrans
ba_have_BOILER
,
freq
xttrans
ba_have_BOILER
,
freq
...
@@ -415,8 +492,10 @@ if `do_longit' {
...
@@ -415,8 +492,10 @@ if `do_longit' {
est
store
xtr_re_logEcons
est
store
xtr_re_logEcons
estout
xtr_re_logEcons
using
"$rpath/xtr_re_logEcons_`version'.txt"
,
cells
(
"b se ci_u ci_l _star "
)
stats
(
r2_w
r2_b
r2_o
rmse
N
sigma_u
sigma_e
,
fmt
(
%9.3f
%9.0g))
replace
estout
xtr_re_logEcons
using
"$rpath/xtr_re_logEcons_`version'.txt"
,
cells
(
"b se ci_u ci_l _star "
)
stats
(
r2_w
r2_b
r2_o
rmse
N
sigma_u
sigma_e
,
fmt
(
%9.3f
%9.0g))
replace
}
}
log
close
do_longit
log
on
main
}
}
di
"* Done!"
di
"* Done!"
log
close
log
close
main
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment