Commit fee2eb5d authored by Ben Anderson's avatar Ben Anderson
Browse files

amendments to descriptive stats. unclear if this is latest version -...

amendments to descriptive stats. unclear if this is latest version - proportion analysis is incorrect
parent 84f5582f
......@@ -9,10 +9,6 @@ author: '`r paste0(params$author)` (Contact: b.anderson@soton.ac.uk, `@dataknut`
date: 'Last run at: `r Sys.time()`'
always_allow_html: yes
output:
bookdown::word_document2:
fig_caption: yes
toc: yes
toc_depth: 2
bookdown::html_document2:
code_folding: hide
fig_caption: yes
......@@ -21,6 +17,10 @@ output:
toc: yes
toc_depth: 2
toc_float: yes
bookdown::word_document2:
fig_caption: yes
toc: yes
toc_depth: 2
bookdown::pdf_document2:
fig_caption: yes
keep_tex: yes
......@@ -39,7 +39,7 @@ knitr::opts_chunk$set(echo = FALSE) # by default turn off code echo
startTime <- proc.time()
library(myUtils) # local utilities
library(dkUtils) # local utilities
# Packages needed in this .Rmd file ----
rmdLibs <- c("data.table", # data munching
......@@ -49,10 +49,11 @@ rmdLibs <- c("data.table", # data munching
"lubridate", # for today
"broom", # tidy test results
"dkUtils", # utilities from devtools::install_github("dataknut/dkUtils")
"forcats", # category manipulation
"knitr" # for kable
)
# load them
myUtils::loadLibraries(rmdLibs)
dkUtils::loadLibraries(rmdLibs)
# Local functions ---
labelProfilePlot <- function(plot){
......@@ -71,7 +72,8 @@ labelProfilePlot <- function(plot){
myParams <- list()
myParams$repoLoc <- dkUtils::findParentDirectory("weGotThePower")
myParams$dPath <- "~/Data/NZ_GREENGrid/safe/gridSpy/1min/dataExtracts/"
myParams$dPath <- "~/Dropbox/Work/data/nzGREENGrid/dataExtracts/"
#myParams$dPath <- "~/Data/NZ_GREENGrid/safe/gridSpy/1min/dataExtracts/"
# created from https://dx.doi.org/10.5255/UKDA-SN-853334
# using https://github.com/CfSOtago/GREENGridData/blob/master/examples/code/extractCleanGridSpy1minCircuit.R
heatPumpData <- paste0(myParams$dPath, "Heat Pump_2015-04-01_2016-03-31_observations.csv.gz")
......@@ -137,7 +139,13 @@ This report contains the analysis for a paper of the same name. The text is stor
```{r loadGgData, include =FALSE}
# Load GREEN Grid Heat Pump data
dt <- data.table::as.data.table(readr::read_csv(heatPumpData, progress = FALSE))
if(file.exists(heatPumpData)){
message("Loading: ", heatPumpData )
dt <- data.table::as.data.table(readr::read_csv(heatPumpData, progress = TRUE))
} else {
message("No such file: ", heatPumpData )
stop()
}
dt <- dt[, month := lubridate::month(r_dateTime)]
dt <- dt[, year := lubridate::year(r_dateTime)]
......@@ -152,27 +160,41 @@ dt <- dt[tmpM >= 9 & tmpM <= 11, season := "Spring"]
dt <- dt[, season := factor(season, levels = c("Spring", "Summer", "Autumn", "Winter"))]
```
```{r meansPowerCalc}
```{r dataPrep}
testDT <- dt[lubridate::hour(r_dateTime) > 15 & # 16:00 ->
lubridate::hour(r_dateTime) < 20 & # <- 20:00
lubridate::wday(r_dateTime) != 6 & # not Saturday
lubridate::wday(r_dateTime) != 7 & # not Sunday
year == 2015,
.(meanW = mean(powerW, na.rm = TRUE)), keyby = .(season, linkID)]
.(meanW = mean(powerW, na.rm = TRUE),
sdW = sd(powerW, na.rm = TRUE)), keyby = .(season, linkID)]
testTable <- testDT[, .(meanMeanW = mean(meanW),
sdMeanW = sd(meanW)), keyby = .(season)]
knitr::kable(caption = "Summary of mean consumption per household by season", testTable)
ggplot2::ggplot(testDT, aes(y = meanW, x = fct_reorder(linkID, meanW, .desc = TRUE), colour = season)) +
geom_point()
```
Observations are summarised to mean W per household during 16:00 - 20:00 on weekdays for year = 2015.
```{r meansPowerCalc}
testMean <- mean(testDT[season == "Winter"]$meanW)
testSD <- mean(testDT[season == "Winter"]$meanW)
testSamples <- seq(50,3000,50)
testPower <- 0.8
testMean <- mean(testDT[season == "Winter"]$meanW)
testSD <- sd(testDT[season == "Winter"]$meanW)
# use package function
meansPowerDT <- weGotThePower::estimateMeanEffectSizes(testMean,testSD,testSamples,testPower) # auto-produces range of p values
```
Figure \@ref(fig:ggHPSampleSizeFig80) shows the initial p = 0.05 plot.
Figure \@ref(fig:ggHPSampleSizeFig80) shows the initial p = 0.01 plot.
```{r ggHPSampleSizeFig80, fig.cap="Power analysis results (p = 0.05, power = 0.8)"}
```{r ggHPSampleSizeFig80, fig.cap="Power analysis results (p = 0.01, power = 0.8)"}
makePowerPlot <- function(dt){
p <- ggplot2::ggplot(dt, aes(x = sampleN, y = effectSize, colour = pValue)) +
......@@ -184,7 +206,7 @@ makePowerPlot <- function(dt){
guides(colour = guide_legend(title = "p value: ")) +
scale_colour_manual(values=cbPalette) + # use colour-blind friendly palette
scale_y_continuous(breaks = seq(0,max(dt$effectSize),5)) + # add detail to y scale
scale_x_continuous(breaks = seq(0,3000,200)) # add detail to x scale
scale_x_continuous(breaks = seq(0,2800,400)) # add detail to x scale
return(p)
}
......@@ -192,7 +214,7 @@ myCaption <- paste0(plotCaption, ", Winter 2015",
"\nStatistic: mean W, weekdays 16:00 - 20:00",
"\nTest: R function power.t.test, statistical power = 0.8")
p <- makePowerPlot(meansPowerDT[pValue == "p = 0.05"])
p <- makePowerPlot(meansPowerDT[pValue == "p = 0.01"])
p <- p + labs(caption = myCaption) +
theme(legend.position="bottom")
......@@ -200,30 +222,30 @@ p <- p + labs(caption = myCaption) +
# add annotations
vLineAlpha <- 0.5
# add hline at effect size for p = 0.05, n = 1000
p005Ref <- meansPowerDT[pValue == "p = 0.05" & sampleN == 1000] # for reference line
y005 <- p005Ref$effectSize # effect size for p = 0.05, n = 1000
x005 <- p005Ref$sampleN
# add hline at effect size for p = 0.01, n = 1000
p001Ref <- meansPowerDT[pValue == "p = 0.01" & sampleN == 1000] # for reference line
y001 <- p001Ref$effectSize # effect size for p = 0.01, n = 1000
x001 <- p001Ref$sampleN
p <- p +
geom_hline(yintercept = y005, colour = "red") +
geom_segment(x = x005, y = y005, xend = x005, yend = 0, alpha = vLineAlpha,
geom_hline(yintercept = y001, colour = "red") +
geom_segment(x = x001, y = y001, xend = x001, yend = 0, alpha = vLineAlpha,
colour = cbPalette[1])
p <- p +
annotate(geom = "text",
x = 1800,
y = y005 + 5,
label = paste0("Effect size = ", round(y005, 2) ,"% with \n p = 0.05, power = 0.8 and n = 1000"),
x = 1400,
y = y001 + 10,
label = paste0("Effect size = ", round(y001, 2) ,"% with \n p = 0.01, power = 0.8 and n = 1000"),
hjust = 0) # https://stackoverflow.com/questions/26684023/how-to-left-align-text-in-annotate-from-ggplot2
p
ggplot2::ggsave("figs/statPowerEsts80means_p0.05.png", p)
ggplot2::ggsave("figs/statPowerEsts80means_p0.01.png", p)
```
Effect size at n = 1000: `r round(y005, 2)`.
Effect size at n = 1000: `r round(y001, 2)`.
Figure \@ref(fig:ggHPSampleSizeFig80all) shows the plot for all results.
......@@ -233,48 +255,49 @@ p <- makePowerPlot(meansPowerDT)
p <- p + labs(caption = myCaption) +
theme(legend.position="bottom")
# add annotations
vLineAlpha <- 0.5
# add hline at effect size for p = 0.05, n = 1000
p005Ref <- meansPowerDT[pValue == "p = 0.05" & sampleN == 1000] # for reference line
y005 <- p005Ref$effectSize # effect size for p = 0.05, n = 1000
x005 <- p005Ref$sampleN
p <- p +
geom_hline(yintercept = y005, colour = "red") +
geom_segment(x = x005, y = y005, xend = x005, yend = 0, alpha = vLineAlpha,
geom_hline(yintercept = y001, colour = "red") +
geom_segment(x = x001, y = y001, xend = x001, yend = 0, alpha = vLineAlpha,
colour = cbPalette[2])
p <- p +
annotate(geom = "text",
x = 1800,
y = y005 + 5,
label = paste0("Effect size = ", round(y005, 2) ,"% with \n p = 0.05, power = 0.8 and n = 1000"),
x = 1400,
y = y001 + 10,
label = paste0("Effect size = ", round(y001, 2) ,"% with \n p = 0.01, power = 0.8 and n = 1000"),
hjust = 0) # https://stackoverflow.com/questions/26684023/how-to-left-align-text-in-annotate-from-ggplot2
# add vline at 0.01 effect size for p = 0.05, n = 1000
# add vline at 0.01 effect size for p = 0.01, n = 1000
p001Ref <- meansPowerDT[pValue == "p = 0.01" &
effectSize < ceiling(p005Ref$effectSize) &
effectSize > floor(p005Ref$effectSize)] # for reference line
effectSize < ceiling(p001Ref$effectSize) &
effectSize > floor(p001Ref$effectSize)] # for reference line
x001 <- mean(p001Ref$sampleN)
p <- p + geom_segment(x = x001, y = y005, xend = x001, yend = 0, alpha = vLineAlpha,
p <- p + geom_segment(x = x001, y = y001, xend = x001, yend = 0, alpha = vLineAlpha,
colour = cbPalette[1])
# add vline at 0.1 effect size for p = 0.05, n = 1000
# add vline at 0.05 effect size for p = 0.01, n = 1000
p005Ref <- meansPowerDT[pValue == "p = 0.05" &
effectSize < ceiling(p001Ref$effectSize) &
effectSize > floor(p001Ref$effectSize)] # for reference line
x005 <- mean(p005Ref$sampleN)
p <- p + geom_segment(x = x005, y = y001, xend = x005, yend = 0, alpha = vLineAlpha,
colour = cbPalette[2])
# add vline at 0.1 effect size for p = 0.01, n = 1000
p01Ref <- meansPowerDT[pValue == "p = 0.1" &
effectSize < ceiling(p005Ref$effectSize) &
effectSize > floor(p005Ref$effectSize)] # for reference line
effectSize < ceiling(p001Ref$effectSize) &
effectSize > floor(p001Ref$effectSize)] # for reference line
x01 <- mean(p01Ref$sampleN)
p <- p + geom_segment(x = x01, y = y005, xend = x01, yend = 0, alpha = vLineAlpha,
p <- p + geom_segment(x = x01, y = y001, xend = x01, yend = 0, alpha = vLineAlpha,
colour = cbPalette[3])
# add vline at 0.2 effect size for p = 0.05, n = 1000
# add vline at 0.2 effect size for p = 0.01, n = 1000
p02Ref <- meansPowerDT[pValue == "p = 0.2" &
effectSize < ceiling(p005Ref$effectSize) &
effectSize > floor(p005Ref$effectSize)] # for reference line
effectSize < ceiling(p001Ref$effectSize+2) &
effectSize > floor(p001Ref$effectSize-2)] # for reference line
x02 <- mean(p02Ref$sampleN)
p <- p + geom_segment(x = x02, y = y005, xend = x02, yend = 0, alpha = vLineAlpha,
p <- p + geom_segment(x = x02, y = y001, xend = x02, yend = 0, alpha = vLineAlpha,
colour = cbPalette[4])
p
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment