Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
Census2022
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SERG
Census2022
Commits
f7517d2f
Commit
f7517d2f
authored
Mar 14, 2016
by
Ben Anderson
Browse files
Options
Downloads
Patches
Plain Diff
removed processing for data we don't use & added summaries by npeople,heating type & by date
parent
6a66e110
Branches
main
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
Census2022_CER_CEUS_paper_analysis.R
+112
-133
112 additions, 133 deletions
Census2022_CER_CEUS_paper_analysis.R
with
112 additions
and
133 deletions
Census2022_CER_CEUS_paper_analysis.R
+
112
−
133
View file @
f7517d2f
...
...
@@ -28,20 +28,20 @@
###########################################
# Housekeeping ----
# clear out all old objects etc to avoid confusion
rm
(
list
=
ls
())
#
rm(list = ls())
library
(
data.table
)
# for super fast tables
library
(
foreign
)
# to import the survey data - we've cheated and used STATA to read in and save out the .xlsx file as it
# converts q names and is a lot quicker!
library
(
psych
)
# useful for skew etc
###########################################
# Load data
----
# Load
ing
data
cerPath
<-
"~/Documents/Work/Data/CER Smart Metering Project/"
samplePath
<-
"data/original/CER_both/CER Electricity Revised March 2012/"
dPath
<-
"data/processed/"
# load pre-trial survey
# use
# Load pre-trial survey ----
cerResPreSurvey
<-
paste0
(
cerPath
,
dPath
,
"Smart meters Residential pre-trial survey data.dta"
)
...
...
@@ -60,10 +60,47 @@ cerResPreSurveyDT$baCompletedPreSurvey <- factor(cerResPreSurveyDT$baCompletedPr
with
(
cerResPreSurveyDT
,
table
(
baCompletedPreSurvey
,
question200pleaserecordsexf
,
useNA
=
"always"
)
)
# create heating variable
cerResPreSurveyDT
$
baHeat
<-
ifelse
(
cerResPreSurveyDT
$
question470whichofthefollow
==
1
,
"Electricity_storage"
,
NA
)
cerResPreSurveyDT
$
baHeat
<-
ifelse
(
cerResPreSurveyDT
$
aq
==
1
,
"Electricity_plug_in"
,
cerResPreSurveyDT
$
baHeat
)
cerResPreSurveyDT
$
baHeat
<-
ifelse
(
cerResPreSurveyDT
$
ar
==
1
,
"Gas"
,
cerResPreSurveyDT
$
baHeat
)
cerResPreSurveyDT
$
baHeat
<-
ifelse
(
cerResPreSurveyDT
$
as
==
1
,
"Oil"
,
cerResPreSurveyDT
$
baHeat
)
cerResPreSurveyDT
$
baHeat
<-
ifelse
(
cerResPreSurveyDT
$
at
==
1
,
"Solid_fuels"
,
cerResPreSurveyDT
$
baHeat
)
cerResPreSurveyDT
$
baHeat
<-
ifelse
(
cerResPreSurveyDT
$
au
==
1
,
"Renewables"
,
cerResPreSurveyDT
$
baHeat
)
cerResPreSurveyDT
$
baHeat
<-
ifelse
(
cerResPreSurveyDT
$
av
==
1
,
"Other"
,
cerResPreSurveyDT
$
baHeat
)
# create number of people variable
cerResPreSurveyDT
$
baNpeople
[
cerResPreSurveyDT
$
question410whatbestdescribes
==
1
]
<-
"1"
cerResPreSurveyDT
$
baNpeople
[
cerResPreSurveyDT
$
question420howmanypeopleove
==
1
]
<-
"2"
cerResPreSurveyDT
$
baNpeople
[
cerResPreSurveyDT
$
question420howmanypeopleove
==
2
]
<-
"3"
cerResPreSurveyDT
$
baNpeople
[
cerResPreSurveyDT
$
question420howmanypeopleove
==
3
]
<-
"4"
cerResPreSurveyDT
$
baNpeople
[
cerResPreSurveyDT
$
question420howmanypeopleove
>=
4
]
<-
"5+"
with
(
cerResPreSurveyDT
,
table
(
baNpeople
,
baCompletedPreSurvey
)
)
# create a small subset of the pre trial survey
cerResPreSurveyDTred
<-
cerResPreSurveyDT
[,
.
(
ID
,
baCompletedPreSurvey
)]
cerResPreSurveyDTred
<-
cerResPreSurveyDT
[,
.
(
ID
,
baCompletedPreSurvey
,
baHeat
,
baNpeople
)]
#
l
oad post-trial survey
#
L
oad post-trial survey
----
cerResPostSurvey
<-
paste0
(
cerPath
,
dPath
,
"Smart meters Residential post-trial survey data.dta"
)
...
...
@@ -101,50 +138,20 @@ cerSampleAllocDT$ba_allocation_code <- factor(cerSampleAllocDT$ba_allocation_cod
)
)
#
l
oad 2009
data
#
L
oad
Oct
2009
consumption data ----
cerOct09
<-
paste0
(
cerPath
,
dPath
,
"CER_October_2009_residential.csv"
)
cerOct09DT
<-
fread
(
cerOct09
)
setkey
(
cerOct09DT
,
ID
)
cerDec09
<-
paste0
(
cerPath
,
dPath
,
"CER_December_2009_residential.csv"
)
cerDec09DT
<-
fread
(
cerDec09
)
setkey
(
cerDec09DT
,
ID
)
# load 2010 data
cerOct10
<-
paste0
(
cerPath
,
dPath
,
"CER_October_2010_residential.csv"
)
cerOct10DT
<-
fread
(
cerOct10
)
setkey
(
cerOct10DT
,
ID
)
cerDec10
<-
paste0
(
cerPath
,
dPath
,
"CER_December_2010_residential.csv"
)
cerDec10DT
<-
fread
(
cerDec10
)
setkey
(
cerDec10DT
,
ID
)
# Check we just have residential ----
#
2009
#
Use Code on consumption data
with
(
cerOct09DT
,
table
(
Code
)
)
with
(
cerDec09DT
,
table
(
Code
)
)
# 2010
with
(
cerOct10DT
,
table
(
Code
)
)
with
(
cerDec10DT
,
table
(
Code
)
)
# Do additional check using sample allocation file
# 2009
with
(
cerOct09DT
[
cerSampleAllocDT
],
table
(
Code
,
ba_allocation_code
,
useNA
=
"always"
)
)
...
...
@@ -160,51 +167,8 @@ print(paste0(
)
)
with
(
cerDec09DT
[
cerSampleAllocDT
],
table
(
Code
,
ba_allocation_code
,
useNA
=
"always"
)
)
# looks OK but filter in any case using allocation code
cerDec09DTres
<-
cerDec09DT
[
cerSampleAllocDT
[
cerSampleAllocDT
$
ba_allocation_code
==
"Residential"
]]
cerDec09DTres
<-
cerDec09DTres
[
cerDec09DTres
$
Code
==
"Residential"
]
print
(
paste0
(
"Original Dec 09: "
,
length
(
cerDec09DT
$
ID
),
" records with "
,
uniqueN
(
cerDec09DT
$
ID
),
" unique IDs"
)
)
print
(
paste0
(
"Clean Dec 09: "
,
length
(
cerDec09DTres
$
ID
),
" records with "
,
uniqueN
(
cerDec09DTres
$
ID
),
" unique IDs"
)
)
# 2010
with
(
cerOct10DT
[
cerSampleAllocDT
],
table
(
Code
,
ba_allocation_code
,
useNA
=
"always"
)
)
# looks OK but filter in any case using allocation code
cerOct10DTres
<-
cerOct10DT
[
cerSampleAllocDT
[
cerSampleAllocDT
$
ba_allocation_code
==
"Residential"
]]
cerOct10DTres
<-
cerOct10DTres
[
cerOct10DTres
$
Code
==
"Residential"
]
print
(
paste0
(
"Original Oct 10: "
,
length
(
cerOct10DT
$
ID
),
" records with "
,
uniqueN
(
cerOct10DT
$
ID
),
" unique IDs"
)
)
print
(
paste0
(
"Clean Oct 10: "
,
length
(
cerOct10DTres
$
ID
),
" records with "
,
uniqueN
(
cerOct10DTres
$
ID
),
" unique IDs"
)
)
with
(
cerDec10DT
[
cerSampleAllocDT
],
table
(
Code
,
ba_allocation_code
,
useNA
=
"always"
)
)
# looks OK but filter in any case using allocation code
cerDec10DTres
<-
cerDec10DT
[
cerSampleAllocDT
[
cerSampleAllocDT
$
ba_allocation_code
==
"Residential"
]]
cerDec10DTres
<-
cerDec10DTres
[
cerDec10DTres
$
Code
==
"Residential"
]
print
(
paste0
(
"Original Dec 10: "
,
length
(
cerDec10DT
$
ID
),
" records with "
,
uniqueN
(
cerDec10DT
$
ID
),
" unique IDs"
)
)
print
(
paste0
(
"Clean Dec 10: "
,
length
(
cerDec10DTres
$
ID
),
" records with "
,
uniqueN
(
cerDec10DTres
$
ID
),
" unique IDs"
)
)
# drop tables we don't need
cerOct09DT
<-
NULL
# Check for survey matches ----
# 2009
...
...
@@ -216,28 +180,6 @@ with(cerOct09DTres,
cerOct09DTres
<-
cerOct09DTres
[
ba_allocation_code
==
"Residential"
&
baCompletedPreSurvey
==
"Pre-trial survey"
]
print
(
paste0
(
"Oct 09 IDs who both answered pre trial survey and recorded data: "
,
uniqueN
(
cerOct09DTres
$
ID
)))
cerDec09DTres
<-
cerDec09DTres
[
cerResPreSurveyDTred
]
with
(
cerDec09DTres
,
table
(
ba_allocation_code
,
baCompletedPreSurvey
,
useNA
=
"always"
)
)
cerDec09DTres
<-
cerDec09DTres
[
ba_allocation_code
==
"Residential"
&
baCompletedPreSurvey
==
"Pre-trial survey"
]
print
(
paste0
(
"Dec 09 IDs who both answered pre trial survey and recorded data: "
,
uniqueN
(
cerDec09DTres
$
ID
)))
# 2010
cerOct10DTres
<-
cerOct10DTres
[
cerResPreSurveyDTred
]
with
(
cerOct10DTres
,
table
(
ba_allocation_code
,
baCompletedPreSurvey
,
useNA
=
"always"
)
)
cerOct10DTres
<-
cerOct10DTres
[
ba_allocation_code
==
"Residential"
&
baCompletedPreSurvey
==
"Pre-trial survey"
]
print
(
paste0
(
"Oct 10 IDs who both answered pre trial survey and recorded data: "
,
uniqueN
(
cerOct10DTres
$
ID
)))
cerDec10DTres
<-
cerDec10DTres
[
cerResPreSurveyDTred
]
with
(
cerDec10DTres
,
table
(
ba_allocation_code
,
baCompletedPreSurvey
,
useNA
=
"always"
)
)
cerDec10DTres
<-
cerDec10DTres
[
ba_allocation_code
==
"Residential"
&
baCompletedPreSurvey
==
"Pre-trial survey"
]
print
(
paste0
(
"Dec 10 IDs who both answered pre trial survey and recorded data: "
,
uniqueN
(
cerDec10DTres
$
ID
)))
# Create a useful date/time in the consumption data ----
summary
(
cerOct09DTres
)
head
(
cerOct09DTres
)
...
...
@@ -247,39 +189,13 @@ cerOct09DTres$r_datetime <- as.POSIXct(cerOct09DTres$datetime_start,
# check
head
(
cerOct09DTres
)
# do the rest of them
cerDec09DTres
$
r_datetime
<-
as.POSIXct
(
cerDec09DTres
$
datetime_start
,
tz
=
""
,
"%Y-%m-%d %H:%M:%S"
)
cerOct10DTres
$
r_datetime
<-
as.POSIXct
(
cerOct10DTres
$
datetime_start
,
tz
=
""
,
"%Y-%m-%d %H:%M:%S"
)
cerDec10DTres
$
r_datetime
<-
as.POSIXct
(
cerDec10DTres
$
datetime_start
,
tz
=
""
,
"%Y-%m-%d %H:%M:%S"
)
# extract useful time elements
cerOct09DTres
$
r_date
<-
as.Date
(
cerOct09DTres
$
r_datetime
)
cerOct09DTres
$
r_year
<-
as.POSIXlt
(
cerOct09DTres
$
r_datetime
)
$
year
# since 1900
cerOct09DTres
$
r_mday
<-
as.POSIXlt
(
cerOct09DTres
$
r_datetime
)
$
mday
cerOct09DTres
$
r_wday
<-
as.POSIXlt
(
cerOct09DTres
$
r_datetime
)
$
wday
# Sunday = 0
cerOct09DTres
$
r_hour
<-
as.POSIXlt
(
cerOct09DTres
$
r_datetime
)
$
hour
cerDec09DTres
$
r_year
<-
as.POSIXlt
(
cerDec09DTres
$
r_datetime
)
$
year
# since 1900
cerDec09DTres
$
r_mday
<-
as.POSIXlt
(
cerDec09DTres
$
r_datetime
)
$
mday
cerDec09DTres
$
r_wday
<-
as.POSIXlt
(
cerDec09DTres
$
r_datetime
)
$
wday
# Sunday = 0
cerDec09DTres
$
r_hour
<-
as.POSIXlt
(
cerDec09DTres
$
r_datetime
)
$
hour
cerOct10DTres
$
r_year
<-
as.POSIXlt
(
cerOct10DTres
$
r_datetime
)
$
year
# since 1900
cerOct10DTres
$
r_mday
<-
as.POSIXlt
(
cerOct10DTres
$
r_datetime
)
$
mday
cerOct10DTres
$
r_wday
<-
as.POSIXlt
(
cerOct10DTres
$
r_datetime
)
$
wday
# Sunday = 0
cerOct10DTres
$
r_hour
<-
as.POSIXlt
(
cerOct10DTres
$
r_datetime
)
$
hour
cerDec10DTres
$
r_year
<-
as.POSIXlt
(
cerDec10DTres
$
r_datetime
)
$
year
# since 1900
cerDec09DTres
$
r_mday
<-
as.POSIXlt
(
cerDec09DTres
$
r_datetime
)
$
mday
cerDec09DTres
$
r_wday
<-
as.POSIXlt
(
cerDec09DTres
$
r_datetime
)
$
wday
# Sunday = 0
cerDec09DTres
$
r_hour
<-
as.POSIXlt
(
cerDec09DTres
$
r_datetime
)
$
hour
# create weekday, mid-week and weekend indicator
cerOct09DTres
$
weekend
<-
ifelse
(
cerOct09DTres
$
r_wday
==
0
|
cerOct09DTres
$
r_wday
==
6
,
1
,
# if weekend
...
...
@@ -298,7 +214,7 @@ table(cerOct09DTres$weekend, cerOct09DTres$r_wday, useNA = "always")
table
(
cerOct09DTres
$
weekday
,
cerOct09DTres
$
r_wday
,
useNA
=
"always"
)
table
(
cerOct09DTres
$
mid_week
,
cerOct09DTres
$
r_wday
,
useNA
=
"always"
)
# Descriptives for Tab
v
le 1 (Table 1) ----
# Descriptives for Table 1 (Table 1) ----
# Pre-trial survey completions:
uniqueN
(
cerResPreSurveyDTred
$
ID
)
# Number of households in residential data:
...
...
@@ -308,3 +224,66 @@ uniqueN(cerResPostSurveyDTred$ID)
# Number of households who completed both surveys
cerSurveysDT
<-
cerResPreSurveyDTred
[
cerResPostSurveyDTred
]
table
(
cerSurveysDT
$
baCompletedPreSurvey
,
cerSurveysDT
$
baCompletedPostSurvey
,
useNA
=
"always"
)
# Descriptive statistics for mid-week (Table 2)
# half hour level - all
describe
(
cerOct09DTres
[
mid_week
==
1
,
kWh
])
# baseload 02:00 - 05:00
describe
(
cerOct09DTres
[
mid_week
==
1
&
r_hour
>=
2
&
r_hour
<=
5
,
kWh
]
)
# evening peak 17:00 - 20:00
describe
(
cerOct09DTres
[
mid_week
==
1
&
r_hour
>=
16
&
r_hour
<=
20
,
kWh
]
)
# by heating type
cerOct09DTres
[
mid_week
==
1
,
.
(
N_hh
=
uniqueN
(
ID
),
# number of households in this joined table (survey + consumption data)
N
=
length
(
kWh
),
# n half hour records
Sum
=
sum
(
kWh
,
na.rm
=
TRUE
),
# remove NAs
Mean
=
mean
(
kWh
,
na.rm
=
TRUE
),
sd
=
sd
(
kWh
,
na.rm
=
TRUE
),
median
=
median
(
kWh
,
na.rm
=
TRUE
),
min
=
min
(
kWh
,
na.rm
=
TRUE
),
max
=
max
(
kWh
,
na.rm
=
TRUE
),
skew
=
skew
(
kWh
,
na.rm
=
TRUE
),
kurtosi
=
kurtosi
(
kWh
,
na.rm
=
TRUE
)
),
by
=
baHeat
,
][
order
(
baHeat
)]
# order results
# by number of people
cerOct09DTres
[
mid_week
==
1
,
.
(
N_hh
=
uniqueN
(
ID
),
# number of households in this joined table
N
=
length
(
kWh
),
# n half hour records
Sum
=
sum
(
kWh
,
na.rm
=
TRUE
),
# remove NAs
Mean
=
mean
(
kWh
,
na.rm
=
TRUE
),
sd
=
sd
(
kWh
,
na.rm
=
TRUE
),
median
=
median
(
kWh
,
na.rm
=
TRUE
),
min
=
min
(
kWh
,
na.rm
=
TRUE
),
max
=
max
(
kWh
,
na.rm
=
TRUE
),
skew
=
skew
(
kWh
,
na.rm
=
TRUE
),
kurtosi
=
kurtosi
(
kWh
,
na.rm
=
TRUE
)
),
by
=
baNpeople
,
][
order
(
baNpeople
)]
octSummarybyDateDT
<-
cerOct09DTres
[
mid_week
==
1
,
.
(
N
=
length
(
kWh
),
# n half hour records
Sum
=
sum
(
kWh
,
na.rm
=
TRUE
)
# remove NAs
),
by
=
.
(
ID
,
r_date
)
][
order
(
ID
,
r_date
)]
describe
(
octSummarybyDateDT
$
Sum
)
summary
(
octSummarybyDateDT
)
# test heat differences
# remember skew!
#diff_heat <- kruskal.test(kWh ~baHeat, data = cerOct09DTres[mid_week == 1], na.action = na.omit )
#summary(diff_heat)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment