Skip to content
Snippets Groups Projects
Commit dfdcd663 authored by Ben Anderson's avatar Ben Anderson
Browse files

initial archiving

parents
Branches
No related tags found
No related merge requests found
* Script to analyse DECC's NEED data
* NB this script uses 2 files derived from the original data
* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
*
* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
* b.anderson@soton.ac.uk
* (c) University of Southampton
* The Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
* http://creativecommons.org/licenses/by-nc/4.0/
clear all
capture noisily log close
* written for Mac OSX - remember to change filesystem delimiter for other platforms
local home "/Users/ben/Documents"
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
local dpath "`proot'/NEED/End User Licence File 2014/processed"
local rpath "`proot'/results/NEED/"
local version "v1"
set more off
log using "`rpath'/analyse-NEED-EULF-2014-`version'-$S_DATE.smcl", replace
* use the pre-processed wide form file which contains all years of consumption data but not the constant values which are in the xwave file
use "`dpath'/need_eul_may2014_consumptionfile_wide.dta", clear
* we're goinmg to use 2012 data only
keep HH_ID *2012*
* log the consumption
gen log_Gcons2012 = log(Gcons2012)
gen log_Econs2012 = log(Econs2012)
* combine consumption
* treat missing (gas) as 0
egen Allcons2012 = rowtotal(Gcons2012 Econs2012)
gen log_Allcons2012 = log(Allcons2012)
* create log consumption quintiles
egen quinlog_Allcons2012 = cut(log_Allcons2012), group(5)
egen quinlog_Gcons2012 = cut(log_Gcons2012), group(5)
egen quinlog_Econs2012 = cut(log_Econs2012), group(5)
* merge in the pre-processed cross-year fixed values file
merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile.dta"
* fix some of the variables
* combine IMD: this is a bit dodgy as they are not strictly comparable
gen ba_imd = IMD_ENG
replace ba_imd = IMD_WALES if ba_imd == .
* must use as category variables!!
* set unkown to be 10 -> adds to end of contrasts so can see effect
replace LOFT_DEPTH = 10 if LOFT_DEPTH == .
* set unkown to be 2020 -> adds to end of contrasts so can see effect
replace BOILER_YEAR = 2020 if BOILER_YEAR == .
replace CWI_YEAR = 2020 if CWI_YEAR == .
replace LI_YEAR = 2020 if LI_YEAR == .
* 0 = no
destring BOILER, force replace
replace BOILER = 0 if BOILER == .
* household level vars
local generic_hvars "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_TYPE i.PROP_AGE i.EE_BAND "
local generic_hvarsnp "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_AGE i.EE_BAND "
* area level vars
local generic_rvars "i.ba_region i.ba_imd"
* define different property types
local ptypes "101 102 103 104 105 106"
local pt101 "detached"
local pt102 "semi"
local pt103 "end_terr"
local pt104 "mid_terr"
local pt105 "bung"
local pt106 "flat"
local vars "Gcons Econs Allcons"
foreach v of local vars {
* all hhs model
qui: regress log_`v'2012 `generic_hvars' ///
`generic_rvars' ///
i.BOILER_YEAR
est store rlog_`v'2012
di "* -> `v' estat to test for heteroskedasticity & omitted vars"
estat ovtest
estat hettest
di "* -> `v' linktest"
di "* if p of _hatsq < 0.05 -> mis-spec"
di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
linktest
* by property type - to see if rsq & coefficients vary
foreach p of local ptypes {
di "* -> testing log_`v'2012 for `pt`p''"
qui: regress log_`v'2012 `generic_hvarsnp' ///
`generic_rvars' ///
i.BOILER_YEAR ///
if PROP_TYPE == `p'
est store rlog_`v'2012_`pt`p''
di "* -> `v' 2012 `pt`p'' - estat to test for heteroskedasticity & omitted vars"
estat ovtest
estat hettest
di "* -> `v' `pt`p'' linktest"
di "* if p of _hatsq < 0.05 -> mis-spec"
di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
linktest
}
* for different consumption quintiles - to see if rsq & coefficients vary
foreach q of numlist 0/4 {
di "* -> testing log_`v'2012 for quintile: `q'"
qui: regress log_`v'2012 `generic_hvars' ///
`generic_rvars' ///
i.BOILER_YEAR ///
if quinlog_`v'2012 == `q'
est store rlog_`v'2012q`q'
di "* -> quintile: `q' - estat to test for heteroskedasticity & omitted vars"
estat ovtest
estat hettest
di "* -> quintile: `q' - linktest"
di "* if p of _hatsq < 0.05 -> mis-spec"
di "* http://www.ats.ucla.edu/stat/stata/webbooks/logistic/chapter3/statalog3.htm"
linktest
}
}
* output all the results - that's a lot of t tests!
estout rlog_Gcons2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Gcons2012q* using "`rpath'/NEED-EULF-2014-log-gas-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Gcons2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Econs2012 using "`rpath'/NEED-EULF-2014-log-elec-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 N ll)
estout rlog_Econs2012q* using "`rpath'/NEED-EULF-2014-log-elec-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 N ll)
estout rlog_Econs2012_* using "`rpath'/NEED-EULF-2014-log-elec-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012 using "`rpath'/NEED-EULF-2014-log-energy-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012q* using "`rpath'/NEED-EULF-2014-log-energy-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
estout rlog_Allcons2012_* using "`rpath'/NEED-EULF-2014-log-energy-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
* Extract data from DECC extracts
clear all
capture log close
set more off
local where = "/Users/ben/Documents"
local projroot = "`where'/Work/Data/Social Science Datatsets/DECC"
local dpath = "`projroot'/LSOA Energy Data"
local dofiles = "`projroot'/do_files"
local rpath = "`projroot'/LSOA Energy Data/processed"
* census data location
local cpath = "`where'/Work/Data/Social Science Datatsets/UK Census/2001Data"
local eimdpath = "`where'/Work/Data/Social Science Datatsets/Indices of Deprivation/English ID 2010"
local pcluts = "`where'/Work/Data/GIS data/UK Postcodes/pcluts_2007nov_processed"
local urbpath = "`where'/Work/Data/Social Science Datatsets/UK Urban Rural Classification"
log using "`rpath'/extract-DECC-LSOA-energy-data-from-excel-$S_DATE.smcl", replace
local years "2007 2008 2009 2010"
* set to 1 to get set-up etc to run
local do_2007 = 0
local do_2008 = 0
local do_2009 = 0
local do_2010 = 0
local do_matrix_graphs = 0
local do_bar_graphs = 1
* 2007 = experimental pilots
local dist_testvars "morphologyname imd2010_dec"
* 2008
if `do_2008' {
* 2008 - data in GOR tab format!!
local y = "2008"
local gor1 "North West"
local gor2 "West Midlands"
local gor3 "East Midlands"
local gor4 "North East"
local gor5 "Yorkshire and Humber"
local gor6 "East of England"
local gor7 "Greater London"
local gor8 "South East"
local gor9 "South West"
local gor10 "Wales"
foreach g of numlist 1/10 {
di "* Importing GOR `g'"
capture noisily import excel "`dpath'/`y'/790-llsoa-electricity-`y'-england-wales.xls", sheet("`gor`g''") cellrange(A13) allstring firstrow clear
save "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta", replace
}
* start with first
use "`dpath'/`y'/790-llsoa-electricity-`y'-gor1.dta", clear
* append the others
foreach g of numlist 2/10 {
append using "`dpath'/`y'/790-llsoa-electricity-`y'-gor`g'.dta"
}
destring Ordinarydomesticconsumption Economy7consumption Numberofordinarydomesticmete ///
Numberofeconomy7meters Averageordinarydomesticconsum Averageeconomy7consumption, replace float force
rename LLSOAcode lsoacode_`y'
gen zonecode = lsoacode_`y'
* duplicates? Caused by disclosure control?
duplicates report zonecode
duplicates tag zonecode, gen(dups_`y')
gen gor_`y' = GOR
lab var gor_`y' "Government Office Region (`y' data)"
*lab def gor_`y' 1 "North East" 2 "North West" 3 "Yorkshire and Humber" 4 "East Midlands" 5 "West Midlands" 6 "East of England" 7 "Greater London" 8 "South East" 9 "South West" 10 "Wales"
*lab val gor_`y' gor_`y'
tab gor_`y' dups_`y', mi
duplicates drop zonecode, force
rename Ordinarydomesticconsumption OrdConsumption_`y'
rename Economy7consumption Econ7consumption_`y'
rename Numberofordinarydomesticmete NumOrdMeters_`y'
rename Numberofeconomy7meters NumEcon7Meters_`y'
rename Averageordinarydomesticconsum AvgOrdConsumption_`y'
rename Averageeconomy7consumption AvgEcon7consumption_`y'
lab var OrdConsumption_`y' "Ordinary domestic consumption `y'"
lab var Econ7consumption_`y' "Economy 7 consumption `y'"
lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'"
lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'"
lab var AvgOrdConsumption_`y' "Average domestic consumption `y'"
lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'"
save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace
di "* Done `y'"
di "*****************************"
}
if `do_2009' {
* 2009 - data in .csv files
local y = "2009"
insheet using "`dpath'/`y'/2347-llsoa-domestic-elec-raw.csv", clear
rename llsoa lsoacode_`y'
gen zonecode = lsoacode_`y'
rename consumption_dom OrdConsumption_`y'
lab var OrdConsumption_`y' "Ordinary domestic consumption `y'"
rename consumption_e7 Econ7consumption_`y'
lab var Econ7consumption_`y' "Economy 7 consumption `y'"
rename no_mpan_dom NumOrdMeters_`y'
lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'"
rename no_mpan_e7 NumEcon7Meters_`y'
lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'"
rename av_consumption_dom AvgOrdConsumption_`y'
lab var AvgOrdConsumption_`y' "Average domestic consumption `y'"
rename av_consumption_e7 AvgEcon7consumption_`y'
lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'"
* new for 2009
rename no_dis_dom no_dis_dom_`y'
rename no_dis_e7 no_dis_e7_`y'
* duplicates? Caused by disclosure control?
duplicates report zonecode
duplicates tag zonecode, gen(dups_`y')
tab la_name dups_`y', mi
duplicates drop zonecode, force
save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace
di "* Done `y'"
di "*****************************"
}
if `do_2010' {
* 2010 - data
local y = "2010"
import excel "`dpath'/`y'/4813-llsoa-domestic-elec-est-`y'-fixed.xls", sheet("LLSOA Electricity Domestic") firstrow clear
* insheet using "`dpath'/`y'/4813-llsoa-domestic-elec-est-2010.csv", clear comma
rename lsoa lsoacode_`y'
gen zonecode = lsoacode_`y'
rename Ordinarydomesticconsumption OrdConsumption_`y'
lab var OrdConsumption_`y' "Ordinary domestic consumption `y'"
rename Economy7consumption Econ7consumption_`y'
lab var Econ7consumption_`y' "Economy 7 consumption `y'"
rename Numberofordinarydomesticmeters NumOrdMeters_`y'
lab var NumOrdMeters_`y' "Number of ordinary domestic meters `y'"
rename Numberofeconomy7meters NumEcon7Meters_`y'
lab var NumEcon7Meters_`y' "Number of Economy 7 meters `y'"
rename Averageordinarydomesticconsumption AvgOrdConsumption_`y'
lab var AvgOrdConsumption_`y' "Average domestic consumption `y'"
rename Averageeconomy7consumption AvgEcon7consumption_`y'
lab var AvgEcon7consumption_`y' "Average Economy 7 consumption `y'"
* duplicates? Caused by disclosure control?
duplicates report zonecode
duplicates tag zonecode, gen(dups_`y')
tab la_name dups_`y', mi
duplicates drop zonecode, force
save "`dpath'/processed/llsoa-electricity-`y'-all.dta", replace
di "* Done `y'"
di "*****************************"
}
* merge the files starting with baseline LSOA geography file so we can see what is missing
use "`urbpath'/Eng_Wales/lsoa/RUURB_2005_MAR_LSOA_EW.dta", clear
merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_ew_lsoacode.dta", gen(m_lsoa_postcode)
* English IMD 2010
* non-matches will be Wales
merge 1:1 zonecode using "`eimdpath'/ID-2010-indices-domains.dta", gen(m_lsoa_eimd)
* English address & postcode counts
merge 1:1 zonecode using "`pcluts'/NSPDF_NOV_2007_UK_1M_uk_lsoacode_addr_counts.dta", gen(m_address_counts)
* now DECC data - will be non-matches due to aggregation/non-disclosure
merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2008-all.dta", gen(m_elec_2008)
merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2009-all.dta", gen(m_elec_2009)
merge 1:1 zonecode using "`dpath'/processed/llsoa-electricity-2010-all.dta", gen(m_elec_2010)
* first line is junk for some reason
li in 1/2
drop in 1
* test merges
foreach m of varlist m_* {
tab `m'
}
su *_2008 *_2009 *_2010
local testvars "OrdConsumption Econ7consumption AvgOrdConsumption AvgEcon7consumption"
local years "2008 2009 2010"
local diffyears "2008 2009"
foreach v of local testvars {
* 2009 - 2008
gen `v'_09_08_dif = `v'_2009 - `v'_2008
lab var `v'_09_08_dif "Change in `v' (2009 - 2008)"
gen `v'_09_08_difpc = 100*(`v'_09_08_dif/`v'_2008)
lab var `v'_09_08_difpc "Change in `v' (2009 - 2008) as % of 2008"
* 2010 - 2009
gen `v'_10_09_dif = `v'_2010 - `v'_2009
lab var `v'_10_09_dif "Change in `v' (2010 - 2009)"
gen `v'_10_09_difpc = 100*(`v'_10_09_dif/`v'_2009)
lab var `v'_10_09_difpc "Change in `v' (2010 - 2009) as % of 2009"
* 2010 - 2008
gen `v'_10_08_dif = `v'_2010 - `v'_2008
lab var `v'_10_08_dif "Change in `v' (2010 - 2008)"
gen `v'_10_08_difpc = 100*(`v'_10_08_dif/`v'_2008)
lab var `v'_10_08_difpc "Change in `v' (2010 - 2008) as % of 2008"
foreach y of local years {
di "****************"
di "* Testing `v' for `y'"
di "*"
gen z`v'_`y' = 0
lab var z`v'_`y' "Value is less than zero (`v', `y')"
replace z`v'_`y' = 1 if `v'_`y' < 0
* How many LSOAs have negative values?
table regionname morphologyname z`v'_`y'
* which LSOAs are they?
li regionname districtname lowersoacode NumOrdMeters_`y' NumEcon7Meters_`y' z`v'_`y' `v'_`y' if z`v'_`y' == 1, sep(0) noobs
di "*"
di "* End testing `v' for `y'"
di "****************"
}
di "****************"
}
if `do_matrix_graphs' {
* test year change
graph matrix OrdConsumption_*, msize(tiny) name(OrdConsumption) half scale(0.75)
graph export "`rpath'/matrix-OrdConsumption.png", replace
graph matrix Econ7consumption_*, msize(tiny) name(Econ7consumption) half scale(0.75)
graph export "`rpath'/matrix-Econ7consumption.png", replace
graph matrix NumOrdMeters_*, msize(tiny) name(NumOrdMeters) half scale(0.75)
graph export "`rpath'/matrix-NumOrdMeters.png", replace
graph matrix NumEcon7Meters_*, msize(tiny) name(NumEcon7Meters) half scale(0.75)
graph export "`rpath'/matrix-NumEcon7Meters.png", replace
graph matrix AvgOrdConsumption_*, msize(tiny) name(AvgOrdConsumption) half scale(0.75)
graph export "`rpath'/matrix-AvgOrdConsumption.png", replace
graph matrix AvgEcon7consumption_*, msize(tiny) name(AvgEcon7consumption) half scale(0.75)
graph export "`rpath'/matrix-AvgEcon7consumption.png", replace
* compare with address counts
graph matrix NumOrdMeters_* g_uk_address_count g_uk_deliverypoint_count, msize(tiny) name(address_counts) half scale(0.75)
graph export "`rpath'/matrix-ord-meter-address-counts.png", replace
}
if `do_bar_graphs' {
graph hbar *_difpc, by(morphologyname) name(hbar_morph)
graph export "`rpath'/hbar-difpc-morphologyname.png", replace
graph hbar *_difpc, by(imd2010_dec) name(hbar_imd2010_dec)
graph export "`rpath'/hbar-difdif-c-imd2010_dec.png", replace
}
drop v*
su z*
su *dif*
local vars = "NumOrdMeters NumEcon7Meters"
local years "2008 2009 2010"
foreach v of local vars {
foreach y of local years {
gen `v'_`y'_addr_ppn = `v'_`y'/g_uk_address_count
gen `v'_`y'_delptc_ppn = `v'_`y'/g_uk_deliverypoint_count
}
}
local vars = "OrdConsumption_ Econ7consumption_ AvgOrdConsumption_ AvgEcon7consumption_ NumOrdMeters_ NumEcon7Meters_"
foreach dv of local dist_testvars {
di "* Testing by `dv'"
foreach v of local vars {
di "* -> Testing `v'* "
tabstat `v'*, by(`dv')
}
di "* -> Testing NumMeters_* "
tabstat *Meters_*_addr_ppn , by(`dv')
tabstat *Meters_*_delptc_ppn , by(`dv')
}
preserve
keep zonecode regioncode regionname countycode countyname districtcode districtname *200* *dif*
outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo.csv", comma replace
outsheet using "`dpath'/processed/llsoa-electricity-all-years-England-geo-southwest.csv" if regionname == "South West", comma replace
export excel using "`dpath'/processed/llsoa-electricity-all-years-England-geo.xls", sheet("data") firstrow(variables) replace
restore
save "`dpath'/processed/llsoa-electricity-all-years-England-geo.dta", replace
log close
* Script to turn original wide 2014 EULF version of DECC's NEED data into:
* 1. a wide form xwave file containing the fixed value variables
* 2. a wide form file containing just the yearly consumption variables (linked to 1. via HH_ID)
* 3. a long form file containing just the yearly consumption variables (linked to 1. via HH_ID)
* 4. Create codebooks from the above
* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
*
* Notes:
* This dataset is a sample of just over 4 million households which have had an Energy Performance Certificate from the full NEED 'all dwellings' dataset
* Is this all those who have had an EPC or a random sample of all those who've had an EPC?
* Sample bias is unkown - which kinds of dwellings have an EPC?
* Gcons<year>valid variable has undefined labels: G, L, M = ? Presumably 0 = off gas & V = valid?
* ideally DECC should set missing to -99 to aid re-coding and avoid unpleasant surprises in naive analysis!
* Author: Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
* b.anderson@soton.ac.uk
* (c) University of Southampton
* The Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
* http://creativecommons.org/licenses/by-nc/4.0/
clear all
capture noisily log close _all
* written for Mac OSX - remember to change filesystem delimiter for other platforms
local home "/Users/ben/Documents"
local proot "`home'/Work/Data/Social Science Datatsets/DECC"
* for clam
* local proot "`home'/Work/NEED"
local dpath "`proot'/NEED/End User Licence File 2014/"
* NB this is the 2014 EULF we're using
local ifile "need_eul_may2014"
local dfile_orig "`dpath'UKDA-7518-stata11/stata11/`ifile'.dta"
local version "v1"
set more off
log using "`dpath'/processed/process-NEED-EULF-2014-`version'-$S_DATE.smcl", replace name(main)
* create fcodebook & some descriptives
local create_codebook = 1
* create wide form fixed file with (supposedly) unchanging data & a seperate 'wide' consumption data file for cross-sectional analysis
local create_xwavefile = 1
* create long form file with wave (yearly) data - be careful, this take a long time due to large memory use!
local create_longfile = 0
if `create_codebook' {
* create the file with data that (notionally) doesn't change
log off main
log using "`dpath'/processed/codebook-NEED-EULF-2014-`version'-$S_DATE.smcl", replace name(cb)
use "`dfile_orig'", clear
desc
di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?"
codebook
log close cb
log on main
}
if `create_xwavefile' {
* create the file with data that (notionally) doesn't change
use "`dfile_orig'", clear
* create a wide consumption file
preserve
keep HH_ID Gcons* Econs*
save "`dpath'/processed/`ifile'_consumptionfile_wide.dta", replace
restore
drop Gcons* Econs*
* fix some mis-codings
local vars "E7Flag2012 CWI LI"
foreach v of local vars {
destring `v', force replace
replace `v' = 0 if `v' !=1
label def `v' 0 "No or N/A" 1 "Yes"
label val `v' `v'
}
* turn '99' into missing - ideally missing should be -99 to aid re-coding and avoid unpleasant surprises in naive analysis!
replace FP_ENG = . if LOFT_DEPTH == 99
replace LOFT_DEPTH = . if LOFT_DEPTH == 99
* no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?
tabstat IMD_WALES, by(REGION) s(mean min max n)
* there seem to be some welsh LSOAs allocated to English GORs?
tabstat IMD_ENG, by(REGION) s(mean min max n)
* there seem to be some English LSOAs allocated to Wales?
tabstat FP_ENG, by(REGION)
* REGION is ONS admin codes
* create a new variable with meaningful labels
gen ba_region = 1 if REGION == "E12000001"
replace ba_region = 2 if REGION == "E12000002"
replace ba_region = 3 if REGION == "E12000003"
replace ba_region = 4 if REGION == "E12000004"
replace ba_region = 5 if REGION == "E12000005"
replace ba_region = 6 if REGION == "E12000006"
replace ba_region = 7 if REGION == "E12000007"
replace ba_region = 8 if REGION == "E12000008"
replace ba_region = 9 if REGION == "E12000009"
replace ba_region = 10 if REGION == "W99999999"
lab var ba_region "former Govt Office region (labelled)"
* http://www.ons.gov.uk/ons/guide-method/geography/beginner-s-guide/administrative/england/government-office-regions/index.html
lab def ba_region 1 "North East" 2 "North West" 3 "Yorkshire & The Humber" 4 "East Midlands" ///
5 "West Midlands" 6 "East of England" 7 "London" 8 "South East" 9 "South West" 10 "Wales"
lab val ba_region ba_region
compress
log off main
log using "`dpath'/processed/codebook-NEED-EULF-2014-`ifile'_xwavefile-`version'-$S_DATE.smcl", replace name(cb_xwave)
desc
di "** no idea what G, L, M mean in the 'valid' variables - presumably 0 = off gas & V = valid?"
di "** urban/rural would be helpful"
codebook
save "`dpath'/processed/`ifile'_xwavefile.dta", replace
log close cb_xwave
log on main
}
if `create_longfile' {
* create the long file with as few vars as possible (quicker)
* still takes a while...
use "`dfile_orig'.dta", clear
keep HH_ID Gcons* Econs*
* panel vars:
* Gcons2005 Gcons2005Valid Econs2005 Econs2005Valid -> 2012
local vars "Gcons Econs"
foreach v of local vars {
di "* Renaming -> `v'"
foreach y of numlist 2005/2012 {
di "* -> `v' (`y')"
* put year on the end so reshape works
gen `v'Valid`y' = `v'`y'Valid
* remove old variable to save time in reshape & space
drop `v'`y'Valid
}
}
* this takes a LONG time - avoid running many times!
reshape long Gcons GconsValid Econs EconsValid, i(HH_ID)
rename _j year
compress
save "`dpath'/processed/`ifile'_consumptionfile_long.dta", replace
}
/*
* THIS TAKES AGES and creates a 1.5 GB file!!!
* now just merge them
* start with long file which may or may not have just been re-created
use "`dpath'/`dfile'_consumptionfile_long.dta", clear
merge m:1 HH_ID using "`dpath'/`dfile'_xwavefile.dta"
save "`dpath'/`dfile'_consumptionfile_long_complete.dta", replace
*/
log close _all
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment