Skip to content
Snippets Groups Projects
Commit f513fd3d authored by Ben Anderson's avatar Ben Anderson
Browse files

altered globals

parent d1d24f0e
No related branches found
No related tags found
No related merge requests found
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
* - Using the Commission for Energy Regulation (CER)'s Irish Smart Meter Trial data * - Using the Commission for Energy Regulation (CER)'s Irish Smart Meter Trial data
* - http://www.ucd.ie/issda/data/commissionforenergyregulationcer/ * - http://www.ucd.ie/issda/data/commissionforenergyregulationcer/
* processes the original data for further use
* This work was funded by RCUK through the ESRC's Transformative Social Science Programme via the * This work was funded by RCUK through the ESRC's Transformative Social Science Programme via the
* "Census 2022: Transforming Small Area Socio-Economic Indicators through 'Big Data'" Project * "Census 2022: Transforming Small Area Socio-Economic Indicators through 'Big Data'" Project
* - http://gtr.rcuk.ac.uk/project/2D2CD798-4F04-4399-B1AF-D810A233DD21 * - http://gtr.rcuk.ac.uk/project/2D2CD798-4F04-4399-B1AF-D810A233DD21
...@@ -32,7 +34,7 @@ global where "/Users/ben/Documents/Work" ...@@ -32,7 +34,7 @@ global where "/Users/ben/Documents/Work"
global proot "$where/Projects/ESRC-Transformative-Census2022" global proot "$where/Projects/ESRC-Transformative-Census2022"
global rfiles "$proot/results/CER-Irish-SM-Trial" global rfiles "$proot/results/CER-Irish-SM-Trial"
* original files * original files
global odfiles "$where/Data/Social Science Datatsets/CER Smart Metering Project" global odfiles "$where/Data/Social Science Datatsets/CER Smart Metering Project/data"
* processed files * processed files
global pdfiles "$proot/data/cer" global pdfiles "$proot/data/cer"
...@@ -53,7 +55,7 @@ timer on 1 ...@@ -53,7 +55,7 @@ timer on 1
************************************ ************************************
************************************ ************************************
* start with the pre-trial survey * start with the pre-trial survey
use "$odfiles/data/processed/Smart meters Residential pre-trial survey data.dta" import excel "$odfiles/original/Smart meters Residential pre-trial survey data.xlsx", sheet("Sheet1") firstrow clear
******** ********
* test age, sex, employment status of chief income earner * test age, sex, employment status of chief income earner
...@@ -162,162 +164,27 @@ recode Question310Whatistheemploym (1/3=1) (4/5=2) (6=3) (7=4), gen(ba_empl) ...@@ -162,162 +164,27 @@ recode Question310Whatistheemploym (1/3=1) (4/5=2) (6=3) (7=4), gen(ba_empl)
lab def ba_empl 1 "In work" 2 "Unemployed" 3 "Retired" 4 "Caring for relative or family" lab def ba_empl 1 "In work" 2 "Unemployed" 3 "Retired" 4 "Caring for relative or family"
lab val ba_empl ba_empl lab val ba_empl ba_empl
save "$pdfiles/Smart meters Residential pre-trial survey data-$version.dta", replace save "$odfiles/processed/Smart meters Residential pre-trial survey data-$version.dta", replace
************************************ ************************************
************************************ ************************************
* load in the two cluster files, merge and save
insheet using "$pdfiles/October 2009 summaries/OctHH_wkend_clusterID.txt", tab clear
rename fitcluster wkend_fitcluster
lab var wkend_fitcluster "Weekend clusters"
rename id ID
compress
save "$pdfiles/October 2009 summaries/OctHH_wkend_clusterID.dta", replace
insheet using "$pdfiles/October 2009 summaries/OctHH_midwk_clusterID.txt", tab clear
rename fitcluster midwk_fitcluster
lab var midwk_fitcluster "Mid-week clusters"
rename id ID
compress
save "$pdfiles/October 2009 summaries/OctHH_midwk_clusterID.dta", replace
merge 1:1 ID using "$pdfiles/October 2009 summaries/OctHH_wkend_clusterID.dta", nogen
* overlap between clusters?
tab wkend_fitcluster midwk_fitcluster, mi
save "$pdfiles/October 2009 summaries/OctHH_clusterIDs.dta", replace
merge 1:1 ID using "$pdfiles/Smart meters Residential pre-trial survey data-$version.dta"
* so 746 households don't match to the Oct 2009 sample leaving us with 3,486
gen oct_sample = 0
replace oct_sample = 1 if _merge == 3
save "$pdfiles/Oct-2009-summaries-survey-$version.dta", replace
******************************
* load in Sharon's daily summaries for weekdays (derived from the raw data)
* this one has spaces as delimiter
insheet using "$pdfiles/October 2009 summaries/CER_OctHH_midwk_long.txt", delim(" ") clear
destring ecf lf, replace force
gen midweek = 1
compress
save "$pdfiles/October 2009 summaries/CER_OctHH_midwk_long.dta", replace
* this one has tabs!
insheet using "$pdfiles/October 2009 summaries/CER_OctHH_wkend_long.txt", tab clear
destring ecf lf, replace force
gen midweek = 0
compress
save "$pdfiles/October 2009 summaries/CER_OctHH_wkend_long.dta", replace
* append mid-week
append using "$pdfiles/October 2009 summaries/CER_OctHH_midwk_long.dta"
rename id ID
* remove the dates that are NOT October 2009 (why are they in there anyway??)
drop if dateoct > 300
* add survey & cluster data
merge m:1 ID using "$pdfiles/Oct-2009-summaries-survey-$version.dta", gen(m_survey)
* some survey respondents not in the October data, some in October data not in survey
* keep what matches
keep if m_survey == 3
* save
save "$pdfiles/Oct-2009-daily-summaries-survey-$version.dta", replace
*********************
* Switch to 1/2 hour level data * Switch to 1/2 hour level data
* raw data * raw data
insheet using "$odfiles/data/original/HH2009_long.txt", delimiter(" ") names clear insheet using "$odfiles/original/HH2009_long.txt", delimiter(" ") names clear
rename HHID ID rename hhid ID
rename kw kwh rename kw kwh
tostring ds, force generate(ts_ds) tostring ds, force generate(ts_ds)
gen date = substr(ts_ds,1,3) gen date = substr(ts_ds,1,3)
gen halfhour = substr(ts_ds,4,5) gen halfhour = substr(ts_ds,4,5)
* mid-week
insheet using "$pdfiles/CER_OctHH_data/CER_OctHH_mdwk_30min.txt", tab clear
li in 1/5
* the columns are munched
drop id
rename ds ID
lab var ID "ID"
rename kw timestamp
lab var timestamp "timestamp (original format)"
rename dateoct kwh
lab var kwh "kWh"
rename v5 date
lab var date "date (original format)"
li in 1/5
* need to weed out the October 2010 cases
keep if date < 365
tostring timestamp, gen(tmp_timestamp) force
gen halfhour = substr(tmp_timestamp,4,5)
tab date
* how many households do we have in this sample?
* should be same as from the clustering
preserve
collapse (mean) kwh , by(ID)
desc
restore
gen midweek = 1
lab def midweek 0 "Saturday/Sunday" 1 "Tuesday-Thursday"
lab val midweek midweek
drop tmp_timestamp
save "$pdfiles/CER_OctHH_data/CER_Oct2009HH_mdwk_30min.dta", replace
*******************************
* load in weekends
insheet using "$pdfiles/CER_OctHH_data/CER_OctHH_wkend_30min.txt", tab clear
li in 1/5
* the columns are munched again
drop id
rename ds ID
lab var ID "ID"
rename kw timestamp
lab var timestamp "timestamp (original format)"
rename dateoct kwh
lab var kwh "kWh"
rename v5 date
lab var date "date (original format)"
li in 1/5
* need to weed out the October 2010 cases
keep if date < 365
tostring timestamp, gen(tmp_timestamp) force
gen halfhour = substr(tmp_timestamp,4,5)
gen midweek = 0
lab val midweek midweek
drop tmp_timestamp
save "$pdfiles/CER_OctHH_data/CER_Oct2009HH_wkend_30min.dta", replace
*********************************
* append mid week to weekend
append using "$pdfiles/CER_OctHH_data/CER_Oct2009HH_mdwk_30min.dta"
* add the clustering results
merge m:1 ID using "$pdfiles/October 2009 summaries/OctHH_clusterIDs.dta", gen(m_cluster)
* fix dates properly * fix dates properly
* we know date = 1 = Jan 1st 2009 * we know date = 1 = Jan 1st 2009
gen double s_date = mdy(1, 1, 2009) gen double s_date = mdy(1, 1, 2009)
format s_date %td format s_date %td
* add the number of days but subtract 1 otherwise we will start on 2/1/2009! * add the number of days but subtract 1 otherwise we will start on 2/1/2009!
destring date, force replace
replace s_date = s_date + (date - 1) replace s_date = s_date + (date - 1)
* create day of week (remember in stata 0 = Sunday) * create day of week (remember in stata 0 = Sunday)
...@@ -344,7 +211,7 @@ gen double s_datetime = dhms(s_date, hour, mins, sec) ...@@ -344,7 +211,7 @@ gen double s_datetime = dhms(s_date, hour, mins, sec)
format s_datetime %tc format s_datetime %tc
* add the survey data (makes big file) but only keep what we need * add the survey data (makes big file) but only keep what we need
merge m:1 ID using "$pdfiles/Smart meters Residential pre-trial survey data-$version.dta", gen(m_survey) /// merge m:1 ID using "$odfiles/processed/Smart meters Residential pre-trial survey data-$version.dta", gen(m_survey) ///
keepusing(ba_*) keepusing(ba_*)
sort ID s_datetime sort ID s_datetime
...@@ -352,9 +219,9 @@ sort ID s_datetime ...@@ -352,9 +219,9 @@ sort ID s_datetime
* check * check
li ID date halfhour s_* in 1/12, sep(2) li ID date halfhour s_* in 1/12, sep(2)
drop m_cluster timestamp date ds_halfhour halfhour hour mins sec drop timestamp date ds_halfhour halfhour hour mins sec
save "$pdfiles/CER_OctHH_data/CER_Oct2009HH_30min_survey.dta", replace save "$odfiles/processed/HH2009_long_survey.dta", replace
timer off 1 timer off 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment