diff --git a/LSOA-Domestic/extract-DECC-LSOA-energy-data-from-excel.do b/LSOA-Domestic/extract-DECC-LSOA-energy-data-from-excel.do index 840029d64b98134bb3ff023b5451dc649ec335e4..4ab13d9d837d92892a84c632a01f8f215b301804 100644 --- a/LSOA-Domestic/extract-DECC-LSOA-energy-data-from-excel.do +++ b/LSOA-Domestic/extract-DECC-LSOA-energy-data-from-excel.do @@ -1,27 +1,27 @@ -******************************************* -* Script to: +******************************************* +* Script to: * - Extract data from DECC LSOA level domestic energy consumption data (https://www.gov.uk/government/collections/mlsoa-and-llsoa-electricity-and-gas-estimates) -/* - -Copyright (C) 2014 University of Southampton - -Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) - [Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton] - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License -(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -#YMMV - http://en.wiktionary.org/wiki/YMMV - -*/ +/* + +Copyright (C) 2014 University of Southampton + +Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) + [Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton] + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License +(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +#YMMV - http://en.wiktionary.org/wiki/YMMV + +*/ clear all @@ -29,7 +29,7 @@ capture log close set more off -local where = "/Users/ben/Documents" +local where = "~/Documents" local projroot = "`where'/Work/Data/Social Science Datatsets/DECC" local dpath = "`projroot'/LSOA Energy Data" @@ -37,7 +37,7 @@ local dofiles = "`projroot'/do_files" local rpath = "`dpath'/processed" * census data location -local c2001path = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data" +local c2001path = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data" local c2011path = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data" local eimdpath = "`where'/Work/Data/Social Science Datatsets/Indices of Deprivation/English ID 2010" local pcluts = "`where'/Work/Data/GIS data/UK Postcodes/pcluts_2007nov_processed" @@ -52,7 +52,7 @@ local do_2008 = 0 local do_2009 = 0 local do_2010 = 0 local do_matrix_graphs = 0 -local do_bar_graphs = 1 +local do_bar_graphs = 0 @@ -330,14 +330,18 @@ foreach dv of local dist_testvars { tabstat *Meters_*_delptc_ppn , by(`dv') } +gen lsoacode_2001 = ew_lsoacode preserve - keep zonecode regioncode regionname countycode countyname districtcode districtname *200* *dif* + keep zonecode lsoacode_2001 regioncode regionname countycode countyname districtcode districtname *200* *dif* outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo.csv", comma replace outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo-southwest.csv" if regionname == "South West", comma replace export excel using "`dpath'/processed/llsoa-electricity-all-years-England-geo.xls", sheet("data") firstrow(variables) replace restore + + + save "`dpath'/processed/llsoa-electricity-all-years-England-geo.dta", replace diff --git a/NEED/analyse-NEED-EULF-2014-descriptives.do b/NEED/analyse-NEED-EULF-2014-descriptives.do index c8e0ea9fd20226b5231fdd9866eb616dd0c23069..bc28084e062bbbc599d2e9cf55a10f69d58e1f24 100644 --- a/NEED/analyse-NEED-EULF-2014-descriptives.do +++ b/NEED/analyse-NEED-EULF-2014-descriptives.do @@ -32,7 +32,7 @@ clear all capture noisily log close * written for Mac OSX - remember to change filesystem delimiter for other platforms -local home "/Users/ben/Documents" +local home "~/Documents" local proot "`home'/Work/Data/Social Science Datatsets/DECC" * for clam * local proot "`home'/Work/NEED" diff --git a/NEED/analyse-NEED-EULF-2014-electricity-consumption.do b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do index 1e05ac6ecf2bb649b3cb64284936c2ff7fe8f917..08c4100e1aed67579937d521d66b4e1f6832fd91 100644 --- a/NEED/analyse-NEED-EULF-2014-electricity-consumption.do +++ b/NEED/analyse-NEED-EULF-2014-electricity-consumption.do @@ -37,7 +37,7 @@ capture noisily log close set more off * written for Mac OSX - remember to change filesystem delimiter for other platforms -global home "/Users/ben/Documents" +global home "~/Documents" global dpath "$home/Work/Data/Social Science Datatsets/DECC/NEED/End User Licence File 2014/processed" global rpath "$home/Work/Papers and Conferences/RSS-2015/results" @@ -62,30 +62,46 @@ lab def EconsValidr 1 "(V)alid" 2 "not set" 3 "(L)Elec < 100" 4 "(G) Elec > 25,0 * also be aware that the consumption is rounded in buckets: /* -GconsYEAR . Missing, off gas or invalid consumption 100 � 7,999 Gas consumption kWh rounded to nearest 500 kWh 8,000- 15,999 Gas consumption kWh rounded to nearest 100 kWh 16,000 � 24,999 Gas consumption kWh rounded to nearest 500 kWh 25,000 � 34,999 Gas consumption kWh rounded to nearest 1,000 kWh 35,000 � 50,000 Gas consumption kWh rounded to nearest 5,000 kWh EconsYEAR . Missing or invalid consumption 100 - 9,999 Electricity consumption kWh rounded to nearest 50 kWh 10,000 - 11,999 Electricity consumption kWh rounded to nearest 100 kWh 12,000 - 14,999 Electricity consumption kWh rounded to nearest 500 kWh 15,000 - 19,999 Electricity consumption kWh rounded to nearest 1,000 kWh 20,000 - 25,000 Electricity consumption kWh rounded to nearest 5,000 kWh +GconsYEAR . Missing, off gas or invalid consumption + 100 � 7,999 Gas consumption kWh rounded to nearest 500 kWh + 8,000- 15,999 Gas consumption kWh rounded to nearest 100 kWh + 16,000 � 24,999 Gas consumption kWh rounded to nearest 500 kWh + 25,000 � 34,999 Gas consumption kWh rounded to nearest 1,000 kWh + 35,000 � 50,000 Gas consumption kWh rounded to nearest 5,000 kWh + + +EconsYEAR . Missing or invalid consumption + 100 - 9,999 Electricity consumption kWh rounded to nearest 50 kWh + 10,000 - 11,999 Electricity consumption kWh rounded to nearest 100 kWh + 12,000 - 14,999 Electricity consumption kWh rounded to nearest 500 kWh + 15,000 - 19,999 Electricity consumption kWh rounded to nearest 1,000 kWh + 20,000 - 25,000 Electricity consumption kWh rounded to nearest 5,000 kWh set more off */ log using "$rpath/analyse-NEED-EULF-2014-electricity-consumption-`version'.smcl", replace -if `do_desc' { - di "************************" - di "* Using `sample'% sample" - * load the yearly consumption data - use "$dpath/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear - * merge in the xwave file (fixed data - we assume!) - merge m:1 HH_ID using "$dpath/need_eul_may2014_xwavefile_100pc.dta" +di "************************" +di "* Using `sample'% sample" +* load the yearly consumption data +use "$dpath/need_eul_may2014_consumptionfile_long_`sample'pc.dta", clear - * set as panel in case it wasn't - * fix format of year so xtset doesn't break - format year %ty - xtset HH_ID year, delta(1 year) +* merge in the xwave file (fixed data - we assume!) +merge m:1 HH_ID using "$dpath/need_eul_may2014_xwavefile_100pc.dta" - * examine panel status - xtdescribe +lab var Econs "Electricity (KwH/year)" +lab var Gcons "Gas (KwH/year)" - * set up +* set as panel in case it wasn't +* fix format of year so xtset doesn't break +format year %ty +xtset HH_ID year, delta(1 year) + +* examine panel status +xtdescribe + +* set up local vars "Econs Gcons" foreach v of local vars { di "***************" @@ -101,6 +117,9 @@ foreach v of local vars { lab var `v'Validr "Recoded `v'Valid" lab val `v'Validr `v'Validr + di "* Check transitions (`v'Validr)" + xttrans `v'Validr, freq + * set up consumption deciles levelsof(year), local(levels) foreach l of local levels { @@ -116,60 +135,57 @@ foreach v of local vars { tab `v'_dec year } -stop - * flag dwellings which are off gas for electricity - * NB - in this dataset we don't know if they use electricity as main heat (could be oil) - gen ba_off_gas = 0 - replace ba_off_gas = 1 if GconsValidr == 2 - lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)" - lab val ba_off_gas ba_off_gas - * check - tabstat Gcons Econs, by(ba_off_gas) - di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'" +* flag dwellings which are off gas for electricity +* NB - in this dataset we don't know if they use electricity as main heat (could be oil) +gen ba_off_gas = 0 +replace ba_off_gas = 1 if GconsValidr == 2 +lab def ba_off_gas 0 "On gas (GconsValid!=O)" 1 "Off gas (GconsValid=O, from EPC)" +lab val ba_off_gas ba_off_gas - tab ba_off_gas MAIN_HEAT_FUEL, mi // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'? - table year MAIN_HEAT_FUEL, by(ba_off_gas) - * roughly constant rate throughout years - table year MAIN_HEAT_FUEL, by(ba_off_gas) c(mean Gcons n Gcons) - * but off gas have no gas readings as you'd expect (DECC filter) +* check +tabstat Gcons Econs, by(ba_off_gas) +di "* MAIN_HEAT_FUEL - Description of main heating fuel (gas or other). EPC - but NB could be 'other' but still be 'on gas'" - foreach v of local vars { - di "***************" - di "* Testing `v' for `sample'% sample" +tab ba_off_gas MAIN_HEAT_FUEL, mi // suggests EPC says 'off gas' (via GconsValid) but main heat fuel still says 'gas'? - * overall - xtsum `v' if `v'Valid == "V" - * test values for valid - check for valid 0s for example. This only happens for gas where: - * 100 < gcons < 250 so included but rounded to nearest 500 = 0 +table year MAIN_HEAT_FUEL, by(ba_off_gas) +* roughly constant rate throughout years +table year MAIN_HEAT_FUEL, by(ba_off_gas) c(mean Gcons n Gcons) +* but off gas have no gas readings as you'd expect (DECC filter) - * elec always rounded to nearest 50 so min should always be 100 +foreach v of local vars { + di "***************" + di "* Testing `v' for `sample'% sample" - tabstat `v', by(`v'Valid) s(n mean semean min max) - * by year - di "* check `v' for 0s (`s'% sample)" - table `v' year if `v' < 1000 - table `v'Valid year, c(count `v' min `v' mean `v' max `v') + * overall + xtsum `v' if `v'Valid == "V" + * test values for valid - check for valid 0s for example. This only happens for gas where: + * 100 < gcons < 250 so included but rounded to nearest 500 = 0 - if `do_graphs' { - di "* Running graphs - do not keep in memory, just save out" - di "* Running graphs: histo" - histogram `v' if `v'Valid == "V", by(year) - graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace + * elec always rounded to nearest 50 so min should always be 100 - di "* Running graphs: boxes" - graph box `v' if `v'Valid == "V", over(year) - graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace + tabstat `v', by(`v'Valid) s(n mean semean min max) + * by year + di "* check `v' for 0s (`s'% sample)" + table `v' year if `v' < 1000 + table `v'Valid year, c(count `v' min `v' mean `v' max `v') - graph box `v' if `v'Valid == "V", over(year) by(FLOOR_AREA_BAND) - graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_floor_valid.png", replace + if `do_graphs' { + di "* Running graphs - do not keep in memory, just save out" + di "* Running graphs: histo" + histogram `v' if `v'Valid == "V", by(year) scale(0.75) + graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-histo_`v'_by_year_valid.png", replace - graph box `v' if `v'Valid == "V", over(year) by(EE_BAND) - graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_ee_valid.png", replace + di "* Running graphs: boxes" + graph box `v' if `v'Valid == "V", over(year) scale(0.75) + graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_over_year_valid.png", replace - } + graph box `v' if `v'Valid == "V", over(year) by(FLOOR_AREA_BAND) scale(0.75) + graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_floor_valid.png", replace + + graph box `v' if `v'Valid == "V", over(year) by(EE_BAND) scale(0.75) + graph export "$rpath/graphs/NEED-EULF-2014-`s'pc-box_`v'_yr_ee_valid.png", replace - di "* Check transitions (`v'Validr)" - xttrans `v'Validr, freq } } diff --git a/NEED/analyse-NEED-EULF-2014-models-v2.0.do b/NEED/analyse-NEED-EULF-2014-models-v2.0.do index 76c7b5f6651e8facc9aa828676466bb7c898c9cd..7896acf2a37453f987cfe57c4eb3a40b266e2c73 100644 --- a/NEED/analyse-NEED-EULF-2014-models-v2.0.do +++ b/NEED/analyse-NEED-EULF-2014-models-v2.0.do @@ -32,7 +32,7 @@ clear all capture noisily log close * written for Mac OSX - remember to change filesystem delimiter for other platforms -local home "/Users/ben/Documents" +local home "~/Documents" local proot "`home'/Work/Data/Social Science Datatsets/DECC" * for clam * local proot "`home'/Work/NEED" diff --git a/NEED/process-NEED-EULF-2014.do b/NEED/process-NEED-EULF-2014.do index 851dcff882c8221a5b1af483357f3c0bd41c958d..88de051efc6344344b39f719664ad05db5c51ab2 100644 --- a/NEED/process-NEED-EULF-2014.do +++ b/NEED/process-NEED-EULF-2014.do @@ -52,7 +52,7 @@ clear all capture noisily log close _all * written for Mac OSX - remember to change filesystem delimiter for other platforms -local home "/Users/ben/Documents" +local home "~/Documents" local proot "`home'/Work/Data/Social Science Datatsets/DECC" * for clam * local proot "`home'/Work/NEED"