Skip to content
Snippets Groups Projects
Commit f6e037b6 authored by Ben Anderson's avatar Ben Anderson
Browse files

drake version (?)

parent 3f58e575
Branches
No related tags found
No related merge requests found
Showing
with 9035 additions and 0 deletions
*
!/.gitignore
# we got the power version II (run by drake)
# In this version we run and compare statistical power analysis using three different datasets:
# - Irish CER smart meter data
# - UK SAVE 'smart meter' data
# - NZ GREEN Grid 'smart meter' data
library(drake)
library(data.table)
library(here)
library(lubridate)
# parameters ----
cerPath <- "~/Data/CER_Smart_Metering_Project/data/processed/"
# data before intervention
cerOctElecF <- paste0(cerPath, "CER_October_2009_Residential.csv.gz") # <- half hourly kWh consumption
cerDecElecF <- paste0(cerPath, "CER_December_2009_Residential.csv.gz") # <- half hourly kWh consumption
savePath <- "~/Data/SAVE/processed/"
saveJanElecF <- paste0(savePath, "jan2017clamp15minuteData.csv.gz") # <- quarter hourly Wh consumption
nzggPath <- "~/Data/NZ_GREENGrid/safe/gridSpy/1min/data/"
nzggElecF <- paste0(nzggPath, "allHouseholds_totalW_long.csv.gz") # <- per minute power
# functions ----
labelEveningPeak <- function(dt){
# r_localDateTime must exist
dt[, peakLabel := "Other"]
dt[lubridate::hour(r_localDateTime) >= 16 &
lubridate::hour(r_localDateTime) < 20, # up to
peakLabel := "Evening"]
return(dt)
}
processDataGeneric <- function(dt){
# data processing that is common to all
dt <- dt[, r_dateTime := lubridate::as_datetime(r_dateTime)]
dt <- labelEveningPeak(dt) # might break on NZ <-> UTC?
}
readData <- function(f){
dt <- data.table::fread(f)
return(dt)
}
getCERdata <- function(f){
dt <- readData(f)
# put data processing here
setnames(dt, "r_datetime", "r_localDateTime")
dt <- dt[, r_localDateTime := lubridate::as_datetime(r_localDateTime)]
dt <- labelEveningPeak(dt)
return(dt)
}
getSAVEdata <- function(f){
dt <- readData(f)
# put data processing here
setnames(dt, "obsDateTime", "r_localDateTime")
dt <- dt[, r_localDateTime := lubridate::as_datetime(r_localDateTime)]
dt <- dt[, r_localDateTimeHalfHour := lubridate::floor_date(r_localDateTime, unit = "30 minutes")]
dt <- dt[, .(kWh = sum(consumptionWh)/1000), # aggregate to half hours to match CER
keyby = .(bmg_id, trialGroupNavetas, r_localDateTime = r_localDateTimeHalfHour)]
dt <- labelEveningPeak(dt)
return(dt)
}
getNZGGdata <- function(f){
dt <- readData(f)
# put data processing here
setnames(dt, "time_utc", "r_dateTime")
dt <- dt[, r_localDateTime := lubridate::as_datetime(r_dateTime, tz = "Pacific/Auckland")]
dt <- dt[, r_localDateTimeHalfHour := lubridate::floor_date(r_localDateTime, unit = "30 minutes")]
dt <- dt[, .(kWh = (mean(sumW)/1000)/2), # aggregate to half hours to match CER
keyby = .(linkID, r_localDateTime = r_localDateTimeHalfHour)]
dt <- labelEveningPeak(dt)
return(dt)
}
# check local files ----
# file.exists("drake.Rmd")
# drake plan ----
thePlan <- drake::drake_plan(
cerOctDT = getCERdata(cerOctElecF),
cerDecDT = getCERdata(cerDecElecF),
saveJanDT = getSAVEdata(saveJanElecF),
nzggDT = getNZGGdata(nzggElecF),
report = rmarkdown::render(
input = paste0(here::here(), "/code/wgtpDrake.Rmd"),
output_file = file_out(paste0(here::here(), "/output/wgtpDrake.html")),
quiet = FALSE
)
)
# test it ----
thePlan
config <- drake_config(thePlan)
vis_drake_graph(config)
# do it ----
make(thePlan)
---
params:
authors: 'Ben Anderson, Tom Rushby, Abubakr Bahaj and Patrick James'
title: 'Statistical Power, Statistical Significance, Study Design and Decision Making'
subtitle: 'A tale of three countries...'
title: '`r paste0(params$title)`'
subtitle: '`r paste0(params$subtitle)`'
author: '`r paste0(params$authors)` (Contact: b.anderson@soton.ac.uk, `@dataknut`)'
date: 'Last run at: `r Sys.time()`'
always_allow_html: yes
output:
bookdown::html_document2:
code_folding: hide
fig_caption: yes
number_sections: yes
self_contained: no
toc: yes
toc_depth: 2
toc_float: yes
bookdown::word_document2:
fig_caption: yes
toc: yes
toc_depth: 2
bookdown::pdf_document2:
fig_caption: yes
keep_tex: yes
number_sections: yes
toc: yes
toc_depth: 2
bibliography: '`r path.expand("~/bibliography.bib")`'
---
```{r knitrSetup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE) # by default turn off code echo
```
```{r codeSetup, include=FALSE}
# Set start time ----
startTime <- proc.time()
library(dkUtils) # utilities from devtools::install_github("dataknut/dkUtils")
# Packages needed in this .Rmd file ----
rmdLibs <- c("ggplot2", # for fancy graphs
"broom", # tidy test results
"forcats", # category manipulation
"here", # where are we
"pwr", # power stuff
"kableExtra" # for fancy kable
)
# load them
dkUtils::loadLibraries(rmdLibs)
# Local functions ---
labelProfilePlot <- function(plot){
# adds neat labels to X axis
plot <- plot +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) +
scale_x_time(breaks = c(hms::as.hms("00:00:00"),
hms::as.hms("04:00:00"), hms::as.hms("08:00:00"),
hms::as.hms("12:00:00"), hms::as.hms("16:00:00"),
hms::as.hms("20:00:00"), hms::as.hms("24:00:00")))
return(plot)
}
# Local parameters ----
myParams <- list()
myParams$repoLoc <- here::here()
myParams$license <- paste0(myParams$repoLoc, "/includes/licenseCCBY.Rmd")
myParams$support <- paste0(myParams$repoLoc, "/includes/supportGeneric.Rmd")
myParams$pubLoc <- "Southampton: University of Southampton"
# http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
# with grey
myParams$cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# with black
myParams$cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
```
\newpage
# About
## License
```{r license, child=myParams$license}
```
## Citation
If you wish to use any of the material from this paper please cite as:
* `r params$authors`. (`r lubridate::year(lubridate::now())`) `r params$title` (`r params$subtitle`), `r myParams$pubLoc`.
This work is (c) `r lubridate::year(today())` the authors.
## History
Code & report history:
* [Paper history](https://github.com/dataknut/weGotThePower/commits/master)
## Data:
This report uses:
* Irish CER Smart meter electricity consumption (kWh) data for the pre-trial periods of:
* October 2009 (`Autumn`)
* December 2009 (`Winter`)
* UK SAVE household efficiency/demand response interventions trial 'smart meter' electricity consumption (kWh) data for the pre-trial period of:
* January 2017 (`Winter`)
* NZ GREEN Grid Household electricity demand (kW) data (`r myParams$GGDataDOI` [@anderson_new_2018]) for the period of:
* June - July 2015 (`Winter`)
## Acknowledgements
```{r generic support, child=myParams$support}
```
\newpage
# Introduction
This report contains the analysis for a paper of the same name. The text is stored elsewhere for ease of editing.
# Data
```{r reAdd.Data}
```
# Scenarios
* P = 0.8, p < 0.05 and effect size of 6%
# Compare power
# Statistical Annex
## CER Data
Data as loaded and processed but before any filtering or exclusions...
October:
```{r summaryOctCER}
cerOctDT <- drake::readd(cerOctDT)
t <- summary(cerOctDT)
kableExtra::kable(t, caption = "Summary of CER October 2009 data") %>%
kable_styling()
```
```{r skimOctCER}
skimr::skim(cerOctDT)
```
December:
```{r summaryDecCER}
cerDecDT <- drake::readd(cerDecDT)
t <- summary(cerDecDT)
kableExtra::kable(t, caption = "Summary of CER December 2009 data") %>%
kable_styling()
```
```{r skimDecCER}
skimr::skim(cerDecDT)
```
## SAVE Data
```{r summarySAVEJan}
saveJanDT <- drake::readd(saveJanDT)
t <- summary(saveJanDT)
kableExtra::kable(t, caption = "Summary of SAVE January 2017 data") %>%
kable_styling()
```
```{r skimSAVEJan}
skimr::skim(saveJanDT)
```
## NZ Green Grid Data
```{r summaryNZGG}
nzggDT <- drake::readd(nzggDT)
t <- summary(nzggDT)
kableExtra::kable(t, caption = "Summary of NZ Green Grid data") %>%
kable_styling()
```
```{r skimNZGG}
skimr::skim(nzggDT)
```
# Runtime
```{r check runtime, include=FALSE}
t <- proc.time() - startTime
elapsed <- t[[3]]
```
Analysis completed in `r round(elapsed,2)` seconds ( `r round(elapsed/60,2)` minutes) using [knitr](https://cran.r-project.org/package=knitr) in [RStudio](http://www.rstudio.com) with `r R.version.string` running on `r R.version$platform`.
# R environment
R packages used:
* base R - for the basics [@baseR]
* data.table - for fast (big) data handling [@data.table]
* ggplot2 - for slick graphics [@ggplot2]
* knitr - to create this document & neat tables [@knitr]
* lubridate - date manipulation [@lubridate]
* pwr - non-base power analysis [@pwr]
* skmir - for data skimming [@skimr]
and
* dkUtils - for local dataknut utilities :-) `devtools::install_github("dataknut/dkUtils")`
Session info:
```{r sessionInfo, echo=FALSE}
sessionInfo()
```
# References
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
File added
File added
File added
File added
File added
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment