diff --git a/paper/weGotThePowerDraftPaper.Rmd b/paper/weGotThePowerDraftPaper.Rmd index 5c57ff94bd11c2cf7a0560427fed0581380b0b18..0404965061e5b3280e90979b621865664b881d19 100644 --- a/paper/weGotThePowerDraftPaper.Rmd +++ b/paper/weGotThePowerDraftPaper.Rmd @@ -295,114 +295,72 @@ knitr::kable(caption = "Power analysis for means results table (partial)", ## Proportions -Does not require a sample. +Does not require a sample. As a relatively simple example, suppose we were interested in the adoption of heat pumps in two equal sized samples. Suppose we thought in one sample (say, home owners) we thought it might be 40% and in rental properties it would be 25% (ref BRANZ 2015). What sample size would we need to conclude a significant difference with power = 0.8 and at various p values? + +`pwr::pwr.tp.test()` (ref pwr) can give us the answer... ```{r proportionsPowerCalc} # re-use parameters from above -propPowerDT <- weGotThePower::estimateProportionEffectSizes(testSamples,testPower) +#propPowerDT <- weGotThePower::estimateProportionEffectSizes(testSamples,testPower) + +getPropN <- function(p1, p2, power = 0.8){ + # get sample n for different sig values + sigs <- c(0.01,0.05,0.1, 0.2) + nSigs <- length(sigs) + resultsDT <- data.table::data.table() + for(p in sigs){ + #print(p) + result <- pwr::pwr.2p.test(ES.h(p1, p2), power = 0.8, sig.level = p) + dt <- data.table::as.data.table(broom::tidy(result)) + #print(dt) + resultsDT <- rbind(resultsDT, dt) + } + resultsDT$props <- paste0("p1 = ", p1, " p2 = ", p2) # useful for a label + return(resultsDT) +} ``` -Figure \@ref(fig:propSampleSizeFig80) shows the initial p = 0.05 plot. This shows the difference that would be required - -```{r propSampleSizeFig80, fig.cap="Power analysis results for proportions (p = 0.05, power = 0.8)"} - -myCaption <- paste0("Source: authors' calculations","\nTest: R function pwr::pwr.2p.test, statistical power = 0.8") - -p <- makePowerPlot(propPowerDT[pValue == "p = 0.05"]) - -p <- p + labs(caption = myCaption) + - theme(legend.position="bottom") - -# add annotations -vLineAlpha <- 0.5 - -# add hline at effect size for p = 0.05, n = 1000 -p005Ref <- propPowerDT[pValue == "p = 0.05" & sampleN == 1000] # for reference line -y005 <- p005Ref$effectSize # effect size for p = 0.05, n = 1000 -x005 <- p005Ref$sampleN -p <- p + - geom_hline(yintercept = y005, colour = "red") + - geom_segment(x = x005, y = y005, xend = x005, yend = 0, alpha = vLineAlpha, - colour = cbPalette[1]) - -p <- p + - annotate(geom = "text", - x = 1800, - y = y005 + 5, - label = paste0("Difference = ", round(y005, 2) ,"% with \n p = 0.05, power = 0.8 and n = 1000"), - hjust = 0) # https://stackoverflow.com/questions/26684023/how-to-left-align-text-in-annotate-from-ggplot2 - -p +```{r propTable1} +dt <- getPropN(p1 =0.4, p2 =0.25) +knitr::kable(dt, caption = "Samples required if p1 = 40% and p2 = 25%", digits = 2) +``` -ggplot2::ggsave("figs/statPowerEsts80proportions_p0.05.png", p) +We can repeat this for other values of p1 and p2. For example, suppose both were much smaller (e.g. 10% and 15%)... Clearly we need _much_ larger samples. +```{r propTable1} +dt <- getPropN(p1 =0.1, p2 =0.15) +knitr::kable(dt, caption = "Samples required if p1 = 10% and p2 = 15%", digits = 2) ``` -Figure \@ref(fig:propSampleSizeFig80all) shows the plot for all results. -```{r propSampleSizeFig80all, fig.cap="Power analysis results (power = 0.8)"} -# rebuild for all p values -p <- makePowerPlot(propPowerDT) +The above used an arcsine transform. -p <- p + labs(caption = myCaption) + - theme(legend.position="bottom") - -# add annotations -vLineAlpha <- 0.5 +As a double check, using eqn to assess margin of error... -# add hline at effect size for p = 0.05, n = 1000 -p005Ref <- propPowerDT[pValue == "p = 0.05" & sampleN == 1000] # for reference line -y005 <- p005Ref$effectSize # effect size for p = 0.05, n = 1000 -x005 <- p005Ref$sampleN -p <- p + - geom_hline(yintercept = y005, colour = "red") + - geom_segment(x = x005, y = y005, xend = x005, yend = 0, alpha = vLineAlpha, - colour = cbPalette[2]) - -p <- p + - annotate(geom = "text", - x = 1800, - y = y005 + 5, - label = paste0("Effect size = ", round(y005, 2) ,"% with \n p = 0.05, power = 0.8 and n = 1000"), - hjust = 0) # https://stackoverflow.com/questions/26684023/how-to-left-align-text-in-annotate-from-ggplot2 +If: -# add vline at 0.01 effect size for p = 0.05, n = 1000 -p001Ref <- propPowerDT[pValue == "p = 0.01" & - effectSize < ceiling(p005Ref$effectSize) & - effectSize > floor(p005Ref$effectSize)] # for reference line -x001 <- mean(p001Ref$sampleN) -p <- p + geom_segment(x = x001, y = y005, xend = x001, yend = 0, alpha = vLineAlpha, - colour = cbPalette[1]) - -# add vline at 0.1 effect size for p = 0.05, n = 1000 -p01Ref <- propPowerDT[pValue == "p = 0.1" & - effectSize < ceiling(p005Ref$effectSize) & - effectSize > floor(p005Ref$effectSize)] # for reference line -x01 <- mean(p01Ref$sampleN) -p <- p + geom_segment(x = x01, y = y005, xend = x01, yend = 0, alpha = vLineAlpha, - colour = cbPalette[3]) + * p = 0.4 (40%) + * n = 151 + +```{r proportionErrorMargin151} +p <- 0.4 +n <- 151 +em <- qnorm(0.975) * sqrt(p*(1-p)/n) +emr <- round(em,3) +``` -# add vline at 0.2 effect size for p = 0.05, n = 1000 -p02Ref <- propPowerDT[pValue == "p = 0.2" & - effectSize < ceiling(p005Ref$effectSize) & - effectSize > floor(p005Ref$effectSize)] # for reference line -x02 <- mean(p02Ref$sampleN) -p <- p + geom_segment(x = x02, y = y005, xend = x02, yend = 0, alpha = vLineAlpha, - colour = cbPalette[4]) -p +then the margin of error = +/- `r emr` (`r 100*emr`%). So we could quote the Heat Pump uptake for owner-occupiers as 40% (+/- `r 100*emr`% [or `r 40 - 100*emr` - `r 40 + 100*emr`] with p = 0.05). -ggplot2::ggsave("figs/statPowerEsts80proportions_all.png", p) +```{r proportionErrorMargin500} +p <- 0.4 +n <- 500 +em <- qnorm(0.975) * sqrt(p*(1-p)/n) +emr <- round(em,3) ``` -Full table of results: +This may be far too wide an error margin for our purposes so we may instead have recruited 500 per sample. Now the margin of error is +/- `r emr` (`r 100*emr`%) so we can now quote the Heat Pump uptake for owner-occupiers as 40% (+/- `r 100*emr`% [or `r 40 - 100*emr` - `r 40 + 100*emr`] with p = 0.05). -```{r propPowerTable} -dt <- dcast.data.table(propPowerDT, sampleN ~ pValue) - -knitr::kable(caption = "Power analysis for proportions results table (partial)", - dt[sampleN <= 1000], - digits = 2) -``` +In much the same way as we did for means, we can calculate error margins # Testing for differences: effect sizes, confidence intervals and p values