diff --git a/analysis/GREENGridModel/GREENGridModel_v2.Rmd b/analysis/GREENGridModel/GREENGridModel_v2.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..dc526a047068fdca302a8af6cbe513bfed542a6e --- /dev/null +++ b/analysis/GREENGridModel/GREENGridModel_v2.Rmd @@ -0,0 +1,2073 @@ +--- +params: + subtitle: "" + title: "" + authors: "" +title: '`r params$title`' +subtitle: '`r params$subtitle`' +author: '`r params$authors`' +date: 'Last run at: `r dkUtils::getRunDateTime()`' +output: + bookdown::html_document2: + fig_caption: yes + code_folding: hide + number_sections: yes + toc: yes + toc_depth: 2 + toc_float: TRUE + bookdown::pdf_document2: + fig_caption: yes + number_sections: yes + bookdown::word_document2: + fig_caption: yes + number_sections: yes + toc: yes + toc_depth: 2 + fig_width: 5 +always_allow_html: yes +bibliography: '`r path.expand("~/bibliography.bib")`' +--- + +```{r setup, include=FALSE} +# change default +knitr::opts_chunk$set(echo = TRUE) + +# Log compile time: +startTime <- proc.time() + +require(spatialec) + +myPackages <- c( + "data.table", # data munching + "dkUtils", # utils - from https://github.com/dataknut/dkUtils + "ggplot2", # plots + "here", # easy path management - https://speakerdeck.com/jennybc/zen-and-the-art-of-workflow-maintenance?slide=49 + "hms", # h:m:s functions + "GREENGridData", # processing green grid data + "kableExtra", # pretty tables + "lubridate", # date time functions + "plotly", # interative plots + "skimr", # for skim + "survey", # weighted data summarising + "viridis" # colour-blind friendly palettes +) + +spatialec::loadLibraries(myPackages) # this will try to install packages it can't load + +projRoot <- here::here() + +# --- Project Settings ---- +source(paste0(projRoot, "/packageSettings.R")) # loads knitr options & assigns file paths and loads system info + +# ---- Local Settings ---- +p_Params$censusDataSource <- "http://archive.stats.govt.nz/Census/2013-census/data-tables/meshblock-dataset.aspx" + +p_Params$ggPath <- "~/Data/NZ_GREENGrid" # over-rides default for speed + +# Print system information +message("Running on ", p_Params$sysName, + " from projLoc =", projRoot) # sysName is set in spatialecSettings.R +``` + +# Report Purpose + +To use: + +* NZ Census 2013 data (from `r p_Params$censusDataSource`) and +* NZ GREENGrid household power demand data (from [@anderson_new_2018]) + +To develop initial local area estimates of temporal (half-hourly) power demand in +mesh blocks and/or unit areas in the Hawkes Bay and Taranaki areas. These areas have been chosen as they +match the original sampling sites for the NZ GREENGrid household power data. + +# Abstract + +This paper will present preliminary results from the development of a neighbourhood level temporal electricity demand model for New Zealand. The model uses a spatial microsimulation approach to combine New Zealand Census data with observed demand patterns derived from a small scale household power demand monitoring dataset to produce local area electricity demand profiles for each New Zealand area unit. The paper will describe the reproducible methods used, the results of baseline models of heat pump and lighting demand for selected areas of New Zealand and the implied need for peak demand mitigation methods that can avoid both costly local network reinforcement and increased demand for GHG-emitting generation. The paper will then discuss the results of using the model to explore the potential impact of energy efficient lighting in different areas. The paper will conclude by outlining ways in which the underlying data sources could be improved to provide a more robust and extensible modelling tool for the analysis of local infrastructure resilience. + +# Load GXP data + +This is the grid exit point data (consumption in MWh) for New Zealand. We will need to select just Taranaki & Hawke's Bay for comparison with the two regions we are simulating. + +```{r loadGxpData} +gxpDataOrigDT <- drake::readd(gxpData) +# there may be NA and we only want 2015 for comparison +table(gxpDataOrigDT$`region name`, useNA = "always") +gxpDataOrigDT[, weekdays := ifelse(day_of_week == "Sat" | + day_of_week == "Sun", + "Weekend", + "Weekday")] + +gxpSummaryDT <- gxpDataOrigDT[year == 2015, .(meankWh = mean(kWh, na.rm = TRUE), + sumkWh = sum(kWh, na.rm = TRUE), + nDays = uniqueN(rDate, na.rm = TRUE) + ), + keyby = .(hms = hms::as_hms(hms), season, + weekdays, year, + region = `region name`)] + +head(gxpSummaryDT) + +``` + +Test plot the total kWh (not mean) as differing number of gxps so mean is misleading (Hawke's Bay has fewer gxps but wight higher flow). + +```{r testGxpPlot, fig.cap="Mean daily total MWh per half-hour by season and weekday vs weekend"} +# test plot +# need to make daily +gxpSummaryDT[, dailySumkWh := sumkWh/nDays] +ggplot2::ggplot(gxpSummaryDT[region != ""], aes(x = hms, y = dailySumkWh/1000,colour = season)) + + geom_line() + + facet_grid(region ~ weekdays) + + labs(x = "Time of Day", + y = "MWh") + +``` + +Figure \@ref(fig:testGxpPlot) gives the overall shape of consumption for each region by season and weekday vs weekend for 2015. These profiles contain all consumption (although net of distibuted generation) not just residential. We use this in the energy efficiency section to assess the % difference the scenario makes. + +# Load GREEN Grid survey data + +For now just use the basic version used as IPF input. + +```{r loadGGSurvey} +file.info(ipfSurveyF) + +# we already loaded it in the make file + +message("N households: ", uniqueN(ipfSurveyDT$linkID)) + +message("N variables: ", ncol(ipfSurveyDT)) + +ggHhDT <- ipfSurveyDT # for backwards compatability + +# create categories of useful vars +ggHhDT <- ggHhDT[, presenceKids := ifelse(nKids_0 == 1, "None", "1+")] + +ggHhDT <- ggHhDT[, nBedroomsCat := ifelse(nBedrooms_1_2 == 1, "1-2", NA)] +ggHhDT <- ggHhDT[, nBedroomsCat := ifelse(nBedrooms_3 == 1, "3", nBedroomsCat)] +ggHhDT <- ggHhDT[, nBedroomsCat := ifelse(nBedrooms_4m == 1, "4", nBedroomsCat)] + +ggHhDT <- ggHhDT[, nPeopleCat := ifelse(nPeople_1 == 1, "1-2", NA)] +ggHhDT <- ggHhDT[, nPeopleCat := ifelse(nPeople_2 == 1, "2", nPeopleCat)] +ggHhDT <- ggHhDT[, nPeopleCat := ifelse(nPeople_3 == 1, "3", nPeopleCat)] +ggHhDT <- ggHhDT[, nPeopleCat := ifelse(nPeople_4m == 1, "4+", nPeopleCat)] + +# add in Location etc from hhAttributes +setkey(hhAttributesDT, linkID) +setkey(ggHhDT, linkID) +ggHhDT <- ggHhDT[hhAttributesDT[, .(linkID, Location)]] +names(ggHhDT) + +``` + +# Load IPF weights + +## Census 2013 + +These are created using the GREEN Grid sample, Census 2013 and IPF: + + * Census data processing: https://git.soton.ac.uk/ba1e12/spatialec/blob/master/dataProcessing/processCensusAu2013.Rmd + * GREEN Grid sample processing: https://git.soton.ac.uk/ba1e12/spatialec/blob/master/dataProcessing/processGreenGridSurvey.R + * IPF process: https://git.soton.ac.uk/ba1e12/spatialec/tree/master/ipf + +```{r loadIPF2013} +file.info(ipfCensusF) + +ipfWeights2013DT <- drake::readd(ipfWeights2013) + +ipfWeights2013DT[, AU2013_code := as.factor(AU2013_code )] + +st <- summary(ipfWeights2013DT) + +kableExtra::kable(st, digits = 2, + caption = "Summary of IPF 2013 weights") %>% + kable_styling() +``` + + +# Load and aggregate power demand data + + +Do not do this earlier as it takes a lot of memory. For now we're going to use: + +* Heat Pump demand +* Hot Water demand +* Lighting demand + +These have been extracted from the GREENGrid power demand data [@anderson_new_2018]. + +## Hot Water + +```{r hw.load, eval=TRUE} + +# f <- paste0(p_Params$ggPath, "/safe/gridSpy/1min/dataExtracts/Hot Water_2015-04-01_2016-03-31_observations.csv.gz") +# message("Loading: ", f) +# ggHwOrigDT <- data.table::as.data.table(readr::read_csv(f)) +ggHwOrigDT <- drake::readd(hhHWLoad) +message("N rows loaded: ", dkUtils::tidyNum(nrow(ggHwOrigDT))) +message("N households loaded: ", dkUtils::tidyNum(uniqueN(ggHwOrigDT$linkID))) + +#ggHwOrigDT$hhID <- NULL # not needed + +h <- head(ggHwOrigDT) +kableExtra::kable(h, caption = "GREENGrid Household hot water demand data: head")%>% + kable_styling() + +message("Summary of variables:") +skimr::skim(ggHwOrigDT) +``` + +Note that there are -ve power values in the above summary tables. According to [previous work](https://cfsotago.github.io/GREENGridData/gridSpy1mOutliersReport_v1.0.html) these are predominantly found in one dwelling (rf_46) due to possible incorrect instrument installation and very briefly in others for the same reason (but quickly corrected). We therefore fix the rf_46 data (see https://cfsotago.github.io/GREENGridData/gridSpy1mOutliersReport_v1.0.html#rf46) and any -ve readings before proceeding... + +You will notice this also reduces the number of circuits so that they match the number of households. We also: + + * convert the UTC date to NZ time + * set some useful time/date variables + +```{r hw.clean} +# remove negative ---- +ggHwDT <- ggHwOrigDT[circuit != "Hot Water - Controlled$4231" & meanPowerW >= 0] + + +``` + +```{r hw.summary.setup} +myCaption <- paste0("GREENGrid household power demand data from ", + min(lubridate::date(ggHwDT$r_dateTimeNZ)), " to ", + max(lubridate::date(ggHwDT$r_dateTimeNZ)), + "\n Aggregated to: 15 minute periods", + "\n Hot water: n households = ", uniqueN(ggHwDT$linkID) + ) + + +# set keys for linkage ---- +setkey(ggHhDT, linkID) +setkey(ggHwDT, linkID) +``` + +```{r hw.summary.overall} + +rectAlpha <- 0.1 +vLineAlpha <- 0.4 +vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette +myTextSize <- 3 + +addPeaks <- function(p){ + # takes a plot, assumes hms is hms, adds peak period annotations + # assumes you've set yMin & yMax already + # breaks with facet_grid, scales = "free" so you have to build seperate plots if you want to do that + # there is a complex solution (https://stackoverflow.com/questions/27898651/get-scales-range-of-facets-scales-free) but... + p <- p + annotate("rect", xmin = amPeakStart, + xmax = amPeakEnd, + ymin = yMin, ymax = yMax, + alpha = rectAlpha, fill = vLineCol) + p <- p + annotate("rect", xmin = pmPeakStart, + xmax = pmPeakEnd, + ymin = yMin, ymax = yMax, + alpha = rectAlpha, fill = vLineCol) + return(p) +} + +plotDT <- ggHhDT[, .(linkID)][ggHwDT][, .(meanW = mean(meanPowerW), # merge on linkID here as we summarise (data.table trick) + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, season)] + +p <- ggplot2::ggplot(plotDT, aes(x = hms, y = meanW/1000, colour = season)) + + geom_step() + + scale_colour_discrete(name = "Season") + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW, na.rm = TRUE) +yMax <- max(plotDT$meankW, na.rm = TRUE) +addPeaks(p) +``` + +```{r hw.summary.presenceKids} +plotDT <- ggHhDT[, .(linkID, presenceKids)][ggHwDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, presenceKids, season)] + +p <- ggplot2::ggplot(plotDT[!is.na(presenceKids)], aes(x = hms, y = meanW/1000, colour = presenceKids)) + + geom_step() + + scale_colour_discrete(name = "Presence of children") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +# need to count households per category for the actual data plotted +ggHwIDsDT <- ggHwDT[, .(nObs = .N), keyby = linkID] + +# it will change for each type of circuit +t <- ggHhDT[ggHwIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('Presence of children' = presenceKids)] + +kableExtra::kable(t, caption = "N households (Presence of children)") %>% + kable_styling() +``` + +```{r hw.summary.nBedroomsCat} +# aggregate plot over all households: nBedroomsCat ---- +plotDT <- ggHhDT[, .(linkID, nBedroomsCat)][ggHwDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, nBedroomsCat, season)] + +p <- ggplot2::ggplot(plotDT[!is.na(nBedroomsCat)], aes(x = hms, y = meanW/1000, colour = nBedroomsCat)) + + geom_step() + + facet_grid(season ~ .) + + scale_colour_discrete(name = "N bedrooms") + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +t <- ggHhDT[ggHwIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('N bedrooms' = nBedroomsCat)] + +kableExtra::kable(t, caption = "N households (N bedrooms)") %>% + kable_styling() +``` + +```{r hw.summary.nPeopleCat} +# aggregate plot over all households: nPeopleCat ---- +plotDT <- ggHhDT[, .(linkID, nPeopleCat)][ggHwDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, nPeopleCat, season)] + +p <- ggplot2::ggplot(plotDT[!is.na(nPeopleCat)], aes(x = hms, y = meanW/1000, colour = nPeopleCat)) + + geom_step() + + scale_colour_discrete(name = "N people") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (across households)", + x = "Time of day",caption = paste0(myCaption, "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +t <- ggHhDT[ggHwIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('N people' = nPeopleCat)] + +kableExtra::kable(t, caption = "N households (N people)") %>% + kable_styling() +``` + +```{r hw.summary.season.byID} +# aggregate to half-hour per hh per season +plotDT <- ggHwDT[, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + nObs = .N), keyby = .(linkID, hms, season)] + +# aggregate plot per hh +p <- ggplot2::ggplot(plotDT, aes(x = hms, y = meanW/1000, colour = linkID)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (within household)", + x = "Time of day", + caption = paste0(myCaption, + "\n Hot Water circuits, n = ", + uniqueN(ggHwDT$circuit), + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +```{r hw.season.byLocation} + +# aggregate to half-hour for overall summary plot +setkey(ggHwDT, linkID) +plotDT <- ggHhDT[, .(linkID, Location)][ggHwDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, Location)] + +# aggregate plot over all households +p <- ggplot2::ggplot(plotDT[!is.na(Location)], aes(x = hms, y = meanW/1000, colour = Location)) + + geom_step() + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Hot Water circuits: n = ", + uniqueN(ggHwDT$linkID), + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +## Heat Pump + +```{r hp.load} + +file.info(hhHPLoadF) + +ggHpOrigDT <- drake::readd(hhHPLoad) + +message(paste0("N rows loaded: ", dkUtils::tidyNum(nrow(ggHpOrigDT)))) +message(paste0("N households loaded: ", dkUtils::tidyNum(uniqueN(ggHpOrigDT$linkID)))) + +h <- head(ggHpOrigDT) +kableExtra::kable(h, caption = "GREENGrid Household heat pump demand data: head")%>% + kable_styling() + +message("Summary of variables:") +skimr::skim(ggHpOrigDT) + + +``` + +Note that there are -ve power values in the above summary tables. According to [previous work](https://cfsotago.github.io/GREENGridData/gridSpy1mOutliersReport_v1.0.html) these are predominantly found in one dwelling (rf_46) due to possible incorrect instrument installation and very briefly in others for the same reason (but quickly corrected). We therefore fix the rf_46 data (see https://cfsotago.github.io/GREENGridData/gridSpy1mOutliersReport_v1.0.html#rf46) and any -ve readings before proceeding... + +You will notice this also reduces the number of circuits so that they match the number of households. We also: + + * convert the UTC date to NZ time + * set some useful time/date variables + +```{r hp.clean} +# remove negative ---- +# rf_27 2015-08-22 10:33:00 Heat Pump$2826 27759 +ggHpDT <- ggHpOrigDT[circuit != "Heat Pumps (2x) & Power$4399" & meanPowerW >= 0 & meanPowerW != 27759] + +h <- head(ggHpOrigDT) +kableExtra::kable(h, caption = "GREENGrid Household heat pump demand data: head")%>% + kable_styling() + +message("Summary of variables:") +skimr::skim(ggHpOrigDT) +``` + +```{r hp.summary.setup} +myCaption <- paste0("GREENGrid household power demand data from ", + min(lubridate::date(ggHpDT$r_dateTimeNZ)), " to ", + max(lubridate::date(ggHpDT$r_dateTimeNZ)), + "\n Aggregated to: 15 minute periods", + "\n Heat pumps: n households = ", uniqueN(ggHpDT$linkID) + ) + + +# set keys for linkage ---- +setkey(ggHpDT, linkID) +``` + +```{r hp.summary.charts.overall} +# aggregate plot over all households ---- +plotDT <- ggHhDT[, .(linkID)][ggHpDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, season)] + +p <- ggplot2::ggplot(plotDT, aes(x = hms, y = meanW/1000, colour = season)) + + geom_step() + + scale_colour_discrete(name = "Season") + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +```{r hp.summary.charts.presenceKids} +# aggregate plot over all households: nKids ---- +plotDT <- ggHhDT[, .(linkID, presenceKids)][ggHpDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, presenceKids, season)] + +p <- ggplot2::ggplot(plotDT[!is.na(presenceKids)], aes(x = hms, y = meanW/1000, colour = presenceKids)) + + geom_step() + + scale_colour_discrete(name = "Presence of children") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Heat pumps, n = ", + uniqueN(ggHpDT$linkID), + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +# need to count households per category for the actual data plotted +ggHpIDsDT <- ggHpDT[, .(nObs = .N), keyby = linkID] + +# it will change for each type of circuit +t <- ggHhDT[ggHpIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('Presence of children' = presenceKids)] + +kableExtra::kable(t, caption = "N households (Presence of children)") %>% + kable_styling() +``` + +```{r hp.summary.charts.nBedroomsCat} +# aggregate plot over all households: nBedroomsCat ---- +plotDT <- ggHhDT[, .(linkID, nBedroomsCat)][ggHpDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, nBedroomsCat, season)] + +p <- ggplot2::ggplot(plotDT[!is.na(nBedroomsCat)], aes(x = hms, y = meanW/1000, colour = nBedroomsCat)) + + geom_step() + + scale_colour_discrete(name = "N bedrooms") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +t <- ggHhDT[ggHpIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('N bedrooms' = nBedroomsCat)] + +kableExtra::kable(t, caption = "N households (N bedrooms)") %>% + kable_styling() +``` + +```{r hw.summary.charts.nPeopleCat} +# aggregate plot over all households: nPeopleCat ---- +plotDT <- ggHhDT[, .(linkID, nPeopleCat)][ggHpDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, nPeopleCat, season)] + +p <- ggplot2::ggplot(plotDT[!is.na(nPeopleCat)], aes(x = hms, y = meanW/1000, colour = nPeopleCat)) + + geom_step() + + scale_colour_discrete(name = "N people") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +t <- ggHhDT[ggHpIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('N people' = nPeopleCat)] + +kableExtra::kable(t, caption = "N households (N people)") %>% + kable_styling() +``` + +```{r hp.summary.season.byID} +# aggregate to half-hour per hh per season +plotDT <- ggHpDT[, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + nObs = .N), keyby = .(linkID, hms, season)] + +# aggregate plot per hh +p <- ggplot2::ggplot(plotDT, aes(x = hms, y = meanW/1000, colour = linkID)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (within household)", + x = "Time of day", + caption = paste0(myCaption, + "\n Heat Pump circuits, n = ", + uniqueN(ggHpDT$circuit), + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +```{r hp.season.byLocation} + +# aggregate to half-hour for overall summary plot +setkey(ggHpDT, linkID) +plotDT <- ggHhDT[, .(linkID, Location)][ggHpDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, Location)] + +# aggregate plot over all households +p <- ggplot2::ggplot(plotDT[!is.na(Location)], aes(x = hms, y = meanW/1000, colour = Location)) + + geom_step() + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Heat Pump circuits: n = ", + uniqueN(ggHpDT$linkID), + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +## Lighting + +```{r l.load, eval=TRUE} + +file.info(hhLightingLoadF) + +ggLightingOrigDT <- drake::readd(hhLightingLoad) +message(paste0("N rows loaded: ", dkUtils::tidyNum(nrow(ggLightingOrigDT)))) +message(paste0("N households loaded: ", dkUtils::tidyNum(uniqueN(ggLightingOrigDT$linkID)))) + +h <- head(ggLightingOrigDT) +kableExtra::kable(h, caption = "GREENGrid Household lighting demand data: head")%>% + kable_styling() + +message("Summary of variables:") +skimr::skim(ggLightingOrigDT) + +``` + +Note that there are -ve power values in the above summary tables. According to [previous work](https://cfsotago.github.io/GREENGridData/gridSpy1mOutliersReport_v1.0.html) these are predominantly found in one dwelling (rf_46) due to possible incorrect instrument installation and very briefly in others for the same reason (but quickly corrected). We therefore fix the rf_46 data (see https://cfsotago.github.io/GREENGridData/gridSpy1mOutliersReport_v1.0.html#rf46) and any -ve readings before proceeding. + +As above you will notice this also reduces the number of circuits so that they match the number of households. We also: + + * convert the UTC date to NZ time + * set some useful time/date variables + +```{r l.clean} +# remove negative ---- +ggLightingDT <- ggLightingOrigDT[circuit != "Lighting$4404" & meanPowerW >= 0] + +# set seasons +ggLightingDT <- GREENGridData::addNZSeason(ggLightingDT) + +# set useful dates and times +# when loaded the r_dateTime variable is UTC, we need to change this to NZT +ggLightingDT <- ggLightingDT[, r_dateTimeNZ := lubridate::with_tz(r_dateTime, tzone = "Pacific/Auckland")] +ggLightingDT <- ggLightingDT[, time := hms::as_hms(r_dateTimeNZ)] +ggLightingDT <- ggLightingDT[, halfHour := hms::trunc_hms(time, 30 * 60)] # trunc takes us back to the start of the period +ggLightingDT <- ggLightingDT[, hms := hms::trunc_hms(time, 15 * 60)] # trunc takes us back to the start of the period + + +``` + +```{r l.summary.charts.setup} +myCaption <- paste0("GREENGrid household power demand data from ", + min(lubridate::date(ggLightingDT$r_dateTimeNZ)), " to ", + max(lubridate::date(ggLightingDT$r_dateTimeNZ)), + "\n Aggregated to: 15 minute periods", + "\n Lighting: n households = ", uniqueN(ggLightingDT$linkID) + ) + +# set keys for linkage ---- +setkey(ggHhDT, linkID) +setkey(ggLightingDT, linkID) +``` + +```{r l.summary.charts.overall} +# aggregate plot over all households ---- +plotDT <- ggHhDT[, .(linkID)][ggLightingDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, season)] + +p <- ggplot2::ggplot(plotDT, aes(x = hms, y = meanW/1000, colour = season)) + + geom_step() + + scale_colour_discrete(name = "Season") + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +```{r l.summary.charts.presenceKids} +# aggregate plot over all households: nKids ---- +plotDT <- ggHhDT[, .(linkID, presenceKids)][ggLightingDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, presenceKids, season)] + +p <- ggplot2::ggplot(plotDT[!is.na(presenceKids)], aes(x = hms, y = meanW/1000, colour = presenceKids)) + + geom_step() + + scale_colour_discrete(name = "Presence of children") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +# need to count households per category for the actual data plotted +ggLightingIDsDT <- ggLightingDT[, .(nObs = .N), keyby = linkID] + +# it will change for each type of circuit +t <- ggHhDT[ggLightingIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('Presence of children' = presenceKids)] + +kableExtra::kable(t, caption = "N households (Presence of children)") %>% + kable_styling() +``` + +```{r l.summary.charts.nBedroomsCat} +# aggregate plot over all households: nBedroomsCat ---- +plotDT <- ggHhDT[, .(linkID, nBedroomsCat)][ggLightingDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, nBedroomsCat, season)] + +p <- ggplot2::ggplot(plotDT[!is.na(nBedroomsCat)], aes(x = hms, y = meanW/1000, colour = nBedroomsCat)) + + geom_step() + + scale_colour_discrete(name = "N bedrooms") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +t <- ggHhDT[ggLightingIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('N bedrooms' = nBedroomsCat)] + +kableExtra::kable(t, caption = "N households (N bedrooms)") %>% + kable_styling() +``` + +```{r l.summary.charts.nPeopleCat} +# aggregate plot over all households: nPeopleCat ---- +plotDT <- ggHhDT[, .(linkID, nPeopleCat)][ggLightingDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, nPeopleCat, season)] + +p <- ggplot2::ggplot(plotDT[!is.na(nPeopleCat)], aes(x = hms, y = meanW/1000, colour = nPeopleCat)) + + geom_step() + + scale_colour_discrete(name = "N people") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +t <- ggHhDT[ggLightingIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('N people' = nPeopleCat)] + +kableExtra::kable(t, caption = "N households (N people)") %>% + kable_styling() +``` + + +```{r l.summary.season.byID} +# aggregate to half-hour per hh per season +plotDT <- ggLightingDT[, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + nObs = .N), keyby = .(linkID, hms, season)] + +# aggregate plot per hh +p <- ggplot2::ggplot(plotDT, aes(x = hms, y = meanW/1000, colour = linkID)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (within household)", + x = "Time of day", + caption = paste0(myCaption, + "\n Lighting circuits, n = ", + uniqueN(ggLightingDT$circuit), + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +```{r l.season.byLocation} + +# aggregate to half-hour for overall summary plot +setkey(ggHpDT, linkID) +plotDT <- ggHhDT[, .(linkID, Location)][ggLightingDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, Location)] + +# aggregate plot over all households +p <- ggplot2::ggplot(plotDT[!is.na(Location)], aes(x = hms, y = meanW/1000, colour = Location)) + + geom_step() + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Lighting circuits: n = ", + uniqueN(ggLightingDT$linkID), + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +```{r runToHere} +``` + + + +## Total household demand + +```{r total.load, eval=TRUE} + +file.info(hhTotalLoadF) + +hhTotalLoadOrigDT <- drake::readd(hhTotalLoad) +message(paste0("N rows loaded: ", dkUtils::tidyNum(nrow(hhTotalLoadOrigDT)))) +message(paste0("N households loaded: ", dkUtils::tidyNum(uniqueN(hhTotalLoadOrigDT$linkID)))) + +h <- head(hhTotalLoadOrigDT) +kableExtra::kable(h, caption = "GREENGrid Household lighting demand data: head")%>% + kable_styling() + +message("Summary of variables:") +skimr::skim(hhTotalLoadOrigDT) + +``` + +Note that there are -ve power values in the above summary tables. According to [previous work](https://cfsotago.github.io/GREENGridData/reportTotalPower_circuitsToSum_v1.1.html#4_test_0_and_-ve_power_value_incidence) these are mostly due to the presence of photovoltaic panels. However they are also found in several dwellings (14,25,26,43) due to possible incorrect instrument installation and very briefly in others for the same reason (but quickly corrected). We therefore fix the rf_46 data (see https://cfsotago.github.io/GREENGridData/gridSpy1mOutliersReport_v1.0.html#rf46) and any -ve readings before proceeding. + +As above you will notice this also reduces the number of circuits so that they match the number of households. We also: + + * convert the UTC date to NZ time + * set some useful time/date variables + +```{r total.clean} +# remove negative ---- +hhTotalLoadDT <- hhTotalLoadOrigDT[linkID != "rf_14" & + linkID != "rf_25" & + linkID != "rf_26" & + linkID != "rf_43"] + +# set seasons +hhTotalLoadDT <- GREENGridData::addNZSeason(hhTotalLoadDT) + +# set useful dates and times +# when loaded the r_dateTime variable is UTC, we need to change this to NZT +hhTotalLoadDT <- hhTotalLoadDT[, r_dateTimeNZ := lubridate::with_tz(r_dateTime, tzone = "Pacific/Auckland")] +hhTotalLoadDT <- hhTotalLoadDT[, time := hms::as.hms(r_dateTimeNZ)] +hhTotalLoadDT <- hhTotalLoadDT[, halfHour := hms::trunc_hms(time, 30 * 60)] # trunc takes us back to the start of the period +hhTotalLoadDT <- hhTotalLoadDT[, hms := hms::trunc_hms(time, 15 * 60)] # trunc takes us back to the start of the period + +message(paste0("N rows loaded: ", dkUtils::tidyNum(nrow(hhTotalLoadDT)))) +message(paste0("N households loaded: ", dkUtils::tidyNum(uniqueN(hhTotalLoadDT$linkID)))) + +message("Summary of variables (after filtering):") +skimr::skim(hhTotalLoadDT) + +``` + +```{r total.summary.charts.setup} +myCaption <- paste0("GREENGrid household power demand data from ", + min(lubridate::date(hhTotalLoadDT$r_dateTimeNZ)), " to ", + max(lubridate::date(hhTotalLoadDT$r_dateTimeNZ)), + "\n Aggregated to: 15 minute periods", + "\n Total load: n households = ", uniqueN(hhTotalLoadDT$linkID) + ) + +# set keys for linkage ---- +setkey(ggHhDT, linkID) +setkey(hhTotalLoadDT, linkID) +``` + +Present these charts by PV as well as season etc + +```{r total.summary.charts.overall} +hhAttributesDT[, pvInverter := ifelse(`PV Inverter` == "Yes", + "Has PV", + "No PV")] +setkey(hhAttributesDT,linkID) +ggHhDT <- ggHhDT[hhAttributesDT[, .(linkID, pvInverter)]] +# aggregate plot over all households ---- +plotDT <- ggHhDT[, .(linkID, pvInverter)][hhTotalLoadDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, season, pvInverter)] + +p <- ggplot2::ggplot(plotDT, aes(x = hms, y = meanW/1000, colour = season)) + + geom_step() + + facet_grid(pvInverter ~ .) + + scale_colour_discrete(name = "Season") + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +# save a plot & data for just those with PV for comparison with the time use model +dt <- plotDT[pvInverter == "No PV"] +p <- ggplot2::ggplot(dt, aes(x = hms, y = meanW/1000, colour = season)) + + geom_step() + + facet_grid(pvInverter ~ .) + + scale_colour_discrete(name = "Season") + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0("N households: ", uniqueN(hhAttributesDT[pvInverter == "No PV", .(linkID)]), + "\n Peak demand periods shaded") + ) +yMin <- min(dt$meanW/1000, na.rm = TRUE) +yMax <- max(dt$meanW/1000, na.rm = TRUE) +p <- addPeaks(p) +ggplot2::ggsave(paste0(p_Params$projRoot, "/plots/meanTotalkWProfile_noPV.png"), + plot = p) +data.table::fwrite(dt, + paste0(p_Params$projRoot, "/data/meanTotalkWProfile_noPV.csv")) +``` + +```{r total.summary.charts.presenceKids} +# aggregate plot over all households: nKids ---- +plotDT <- ggHhDT[, .(linkID, presenceKids, pvInverter)][hhTotalLoadDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, presenceKids, season, pvInverter)] + +p <- ggplot2::ggplot(plotDT[!is.na(presenceKids)], aes(x = hms, y = meanW/1000, colour = presenceKids)) + + geom_step() + + scale_colour_discrete(name = "Presence of children") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ pvInverter) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +# need to count households per category for the actual data plotted +ggLightingIDsDT <- hhTotalLoadDT[, .(nObs = .N), keyby = linkID] + +# it will change for each type of circuit +t <- ggHhDT[ggLightingIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('Presence of children' = presenceKids)] + +kableExtra::kable(t, caption = "N households (Presence of children)") %>% + kable_styling() +``` + +```{r total.summary.charts.nBedroomsCat} +# aggregate plot over all households: nBedroomsCat ---- +plotDT <- ggHhDT[, .(linkID, nBedroomsCat,pvInverter)][hhTotalLoadDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, nBedroomsCat, season,pvInverter)] + +p <- ggplot2::ggplot(plotDT[!is.na(nBedroomsCat)], aes(x = hms, y = meanW/1000, colour = nBedroomsCat)) + + geom_step() + + scale_colour_discrete(name = "N bedrooms") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ pvInverter) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +t <- ggHhDT[ggLightingIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('N bedrooms' = nBedroomsCat)] + +kableExtra::kable(t, caption = "N households (N bedrooms)") %>% + kable_styling() +``` + +```{r total.summary.charts.nPeopleCat} +# aggregate plot over all households: nPeopleCat ---- +plotDT <- ggHhDT[, .(linkID, nPeopleCat, pvInverter)][hhTotalLoadDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, nPeopleCat, season,pvInverter)] + +p <- ggplot2::ggplot(plotDT[!is.na(nPeopleCat)], aes(x = hms, y = meanW/1000, colour = nPeopleCat)) + + geom_step() + + scale_colour_discrete(name = "N people") + + #scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ pvInverter) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) + +t <- ggHhDT[ggLightingIDsDT][, .('N households' = uniqueN(linkID)), + keyby = .('N people' = nPeopleCat)] + +kableExtra::kable(t, caption = "N households (N people)") %>% + kable_styling() +``` + + +```{r total.summary.season.byID} +# aggregate to half-hour per hh per season + +plotDT <- ggHhDT[, .(linkID, pvInverter)][hhTotalLoadDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(linkID, hms, season,pvInverter)] + +# aggregate plot per hh +p <- ggplot2::ggplot(plotDT, aes(x = hms, y = meanW/1000, colour = linkID)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ pvInverter) + + labs(y = "Mean kW (within household)", + x = "Time of day", + caption = paste0(myCaption, + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +```{r total.season.byLocation} + +# aggregate to half-hour for overall summary plot + +plotDT <- ggHhDT[, .(linkID, Location, pvInverter)][hhTotalLoadDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(hms, Location, pvInverter)] + +# aggregate plot over all households +p <- ggplot2::ggplot(plotDT[!is.na(Location)], aes(x = hms, y = meanW/1000, colour = Location)) + + geom_step() + + facet_grid(pvInverter ~ .) + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + "\n Lighting circuits: n = ", + uniqueN(hhTotalLoadDT$linkID), + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +```{r runToHere} +``` + + +# Small area level power demand estimates + + +We may have filtered the areas and regions we used as input to speed things up: + +```{r whichRegions} +message("N synthetic households (NB - this is _not_ the actual number of households as it is unweighted):") +ipfWeights2013DT <- drake::readd(ipfWeights2013) + +ipfWeights2013DT[, .(nHouseholds = .N, + meanWeight = mean(ipfWeight)), keyby = .(REGC2013_label)] + +message("N synthetic households:") +ipfWeights2013DT[, .(nHouseholds = .N ), keyby = .(REGC2013_label)] +message("N AUs:") +ipfWeights2013DT[, .(nAreas = uniqueN(AU2013_label), + meanWeight = mean(ipfWeight)), by = .(REGC2013_label)] + +auLabelsDT <- ipfWeights2013DT[, .(nObs = .N), keyby = .(AU2013_label, REGC2013_label)] + +t <- table(ipfWeights2013DT$AU2013_label,ipfWeights2013DT$REGC2013_label) + +kableExtra::kable(t, caption = "Areas used") %>% + kable_styling() +``` + +## Area Units - Hot water: Specific day + +```{r hw.date.select} + +message("Which day(s) have the most active household data?") + +daysActiveDT <- ggHwDT[, .(count = uniqueN(linkID)), + keyby = .(date = lubridate::date(r_dateTimeNZ), linkID)] + +daysDT <- daysActiveDT[, .(nHHs = sum(count)), keyby = .(date)] + +ggplot2::ggplot(daysDT, aes(x = date, y = nHHs)) + + geom_line() + +setkey(daysDT, nHHs, date) # auto-sorts + +tail(daysDT) + +message("So could choose 25th May 2015 which was a Monday. But we should just ") +message ("check this is when the most survey households were active since they were used for the ipf") + +setkey(daysActiveDT, linkID) + +ggHhDT <- ggHhDT[, hasSurvey := "Has survey"] +setkey(ggHhDT, linkID) +setkey(daysActiveDT, linkID) +daysActiveDT <- ggHhDT[daysActiveDT] +daysActiveDT <- daysActiveDT[is.na(hasSurvey), hasSurvey := "No survey"] + +t <- table(daysActiveDT$linkID, daysActiveDT$hasSurvey, useNA = "always") + +kableExtra::kable(t, + caption = "GREENGrid Household hot water data - missing surveys (data = number of days of power data)")%>% + kable_styling() + +message("Looks like we will only lose 2 - re-run the date tests with these two taken out") + +daysDT <- daysActiveDT[hasSurvey == "Has survey", .(nHHs = sum(count)), keyby = .(date)] + +ggplot2::ggplot(daysDT, aes(x = date, y = nHHs)) + + geom_line() + +setkey(daysDT, nHHs, date) # auto-sorts + +tail(daysDT) + +message("We can still take Monday 25th May...") +``` + +Select Monday 25th May from the power demand data and aggregate to half-hourly mean demand for each household. Note that the original data has per minute observations so this will substantially suppress variation. + +```{r hw.date.byLocation} +filter <- lubridate::date("2015-05-25") + +myCaption <- paste0("GREENGrid household power demand data, ", filter) + +dayHwDT <- ggHwDT[lubridate::date(r_dateTimeNZ) == filter] + +message("Testing the start & end times of the subset") +min(dayHwDT$r_dateTimeNZ) +max(dayHwDT$r_dateTimeNZ) + +# aggregate to half-hour for overall summary plot +setkey(dayHwDT, linkID) +plotDT <- dayHwDT[ggHhDT[, .(linkID, Location)]][, .(meankW = mean(meanPowerW/1000), + sdW = sd(meanPowerW/1000), + sumW = sum(meanPowerW/1000), + nObs = .N), keyby = .(hms, Location)] + +# aggregate plot over all households +p <- ggplot2::ggplot(plotDT, aes(x = hms, y = meankW, colour = Location)) + + geom_step() + + #scale_y_log10() + + #scale_colour_viridis(discrete = TRUE) + + labs(y = "Mean kW (across households)", + x = "Time of day", + caption = paste0(myCaption, + ": Hot Water circuits, n = ", + uniqueN(dayHwDT$linkID), + "\n Peak demand periods shaded") + ) +yMin <- min(plotDT$meankW) +yMax <- max(plotDT$meankW) +addPeaks(p) +``` + +```{r hw.date.byID, fig.height=6} +# aggregate to half-hour per hh (re-use later, gets messy) +dayHwHhDT <- dayHwDT[, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + nObs = .N), keyby = .(linkID, hms)] + +# aggregate plot per hh +p <- ggplot2::ggplot(dayHwHhDT, aes(x = hms, y = meanW/1000, colour = linkID)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + labs(x = "Time of Day", y = "Mean kW (within household)", + caption = paste0(myCaption, + "\n Hot Water circuits, n = ", + uniqueN(dayHwDT$linkID), + "\n Peak demand periods shaded") + ) +yMin <- min(dayHwHhDT$meanW)/1000 +yMax <- max(dayHwHhDT$meanW)/1000 +addPeaks(p) + +``` + +So now we need to link the household level half-hourly aggregate data to the synthetic households. + +```{r hw.date.linkAggToSynth} +setkey(dayHwHhDT, linkID) +message("N households with demand data: ", uniqueN(dayHwHhDT$linkID)) + +setkey(ipfWeights2013DT, linkID) +message("N unique households in synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +dataDT <- dayHwHhDT[ipfWeights2013DT, allow.cartesian=TRUE] +message("N unique households in merged power data & synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +message("We will have NA values for power etc for households who were in the survey but for whom we have no power data.") +message("We will have 0 values for ipfWeight for for households who are not needed in a particular areaCode.") + +summary(dataDT) + +message("We can drop both of these to save memory.") + +dataDT <- dataDT[!is.na(meanW) & ipfWeight > 0] +nrow(dataDT) + +table(dataDT$REGC2013_label) + +``` + +Now we can construct synthetic Hot Water demand profiles for each area code. We do this by taking the half-hourly mean of the mean power across all synthetic households in each area code. Note that the 'observation' is the half-hourly household mean value - so variance has already been suppressed at the household level by taking the mean across the half hour. + +Figure \@ref(fig:hw.date.AreaProfiles.Unweighted) shows the unweighted profiles. They should be identical as they are based on the same households repeatedly allocated to each region and area unit. + +```{r hw.date.AreaProfiles.Unweighted,fig.height=6, fig.cap="Uweighted hot water profile"} +# no weights +meanProfilesUnwDT <- dataDT[, .(meankW = mean(meanW/1000)), keyby = .(hms, AU2013_label, REGC2013_label)] + +p <- ggplot2::ggplot(meanProfilesUnwDT, aes(x = hms, y = meankW, colour = AU2013_label)) + + geom_line() + + scale_colour_viridis(discrete = TRUE) + + theme(legend.position="none") + # hide for clarity + facet_grid(. ~ REGC2013_label) + + labs(x = "Time of Day", y = "Mean kW", + caption = paste0(myCaption, + "\n Hot Water circuits, n households (unweighted) = ", + uniqueN(dataDT$linkID), + "\n Replicated over ",uniqueN(dataDT$AU2013_label) , " areas", + "\n Peak demand periods shaded") + ) +yMin <- min(meanProfilesUnwDT$meankW) +yMax <- max(meanProfilesUnwDT$meankW) +addPeaks(p) + +``` + +```{r hw.date.AreaProfiles.Weighted,fig.height=6} + +# with weights + +# retrieve drake object + +svytDT <- drake::readd(auHWLoad2015_05_25) + +svytDT[, AU2013_label := area] + +# add regional labels back +setkey(svytDT, AU2013_label) +setkey(auLabelsDT, AU2013_label) +svytDT <- auLabelsDT[svytDT] + +svytDT <- svytDT[, meankW := meanW/1000] # do this here so addplot can use it + +totalIpfHHs <- sum(ipfWeights2013DT$ipfWeight) # sum of weights = n hhs + +p <- ggplot2::ggplot(svytDT, aes(x = hms, y = meankW, colour = area)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(REGC2013_label ~ .) + + theme(legend.position="none") + # hide for clarity + labs(x = "Time of Day", y = "Mean kW", + caption = paste0(myCaption, + "\n Hot Water circuits, n households (base) = ", uniqueN(dataDT$linkID), + "\n n households (reweighted) = ", dkUtils::tidyNum(totalIpfHHs), + "\n Plotting ",uniqueN(svytDT$AU2013_label) , " areas", + "\n Peak demand periods shaded") + ) +yMin <- min(svytDT$meankW) +yMax <- max(svytDT$meankW) +addPeaks(p) + +plotly::ggplotly(p) + +``` + +## Area units - Hot water: Mean seasonal + +```{r hw.season.agg} +seasonHwHhDT <- ggHwDT[, .(meanW = mean(meanPowerW)), + keyby = .(season, linkID, hms)] +``` + +```{r hw.season.linkAggToSynth} +setkey(seasonHwHhDT, linkID) +message("N households with demand data: ", uniqueN(seasonHwHhDT$linkID)) + +setkey(ipfWeights2013DT, linkID) +message("N unique households in synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +dataDT <- seasonHwHhDT[ipfWeights2013DT, allow.cartesian=TRUE] +message("N unique households in merged power data & synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +message("We will have NA values for power etc for households who were in the survey but for whom we have no power data.") +message("We will have 0 values for ipfWeight for for households who are not needed in a particular areaCode.") + +summary(dataDT) + +message("We can drop both of these to save memory and also drop some variables.") + +dataDT <- dataDT[!is.na(meanW) | ipfWeight > 0, + .(season, linkID, AU2013_label, REGC2013_label, meanW, hms, ipfWeight)] + +summary(dataDT) + +table(dataDT$REGC2013_label) + +``` + +Unweighted area profiles - they will all line up unless a few households got dropped through 0 weights. + +```{r hw.season.AreaProfiles.Unweighted,fig.height=6} +# no weights +meanProfilesUnwDT <- dataDT[!is.na(meanW), .(meankW = mean(meanW/1000)), keyby = .(hms, AU2013_label, season, REGC2013_label)] + +p <- ggplot2::ggplot(meanProfilesUnwDT, aes(x = hms, y = meankW, colour = AU2013_label)) + + geom_line() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ REGC2013_label) + + theme(legend.position="none") + # hide for clarity + labs(x = "Time of Day", y = "Mean kW", + caption = paste0(myCaption, + "\n Hot Water circuits", + "\n n households (unweighted) = ", uniqueN(meanProfilesUnwDT$linkID), + "\n replicated over ",uniqueN(meanProfilesUnwDT$AU2013_label) , " areas", + "\n Peak demand periods shaded") + ) +yMin <- min(meanProfilesUnwDT$meankW) +yMax <- max(meanProfilesUnwDT$meankW) +addPeaks(p) + +``` + +Now look at the weighted area profiles. + +```{r hw.season.AreaProfiles.Weighted, fig.height=6} + +# with weights +svytDT <- drake::readd(auHWLoad) +svytDT[, AU2013_label := area] + +# add regional labels back +setkey(svytDT, AU2013_label) +setkey(auLabelsDT, AU2013_label) +svytDT <- auLabelsDT[svytDT] + +svytDT <- svytDT[, meankW := meanW/1000] # do this here so addplot can use it + +totalIpfHHs <- sum(ipfWeights2013DT$ipfWeight) # sum of weights = n hhs + +p <- ggplot2::ggplot(svytDT, aes(x = hms, y = meankW, colour = area)) + + geom_step() + + facet_grid(season ~ REGC2013_label) + + scale_colour_viridis(discrete = TRUE) + + theme(legend.position="none") + # hide for clarity + labs(x = "Time of Day", y = "Mean kW", + caption = paste0(myCaption, + "\n Hot Water circuits", + "\n n households (base) = ", uniqueN(dataDT$linkID), + "\n n households (reweighted) = ", dkUtils::tidyNum(totalIpfHHs), + "\n Plotting ",uniqueN(svytDT$AU2013_label) , " areas"), + "\n Peak demand periods shaded") + +yMin <- min(svytDT$meankW) +yMax <- max(svytDT$meankW) +addPeaks(p) + +plotly::ggplotly(p) + +``` + + +## Area units - Lighting: Mean seasonal + +```{r l.season.byID} + +myCaption <- paste0("GREENGrid household power demand data from ", min(ggLightingDT$r_dateTimeNZ), " to ", max(ggLightingDT$r_dateTimeNZ)) + +# aggregate to half-hour per hh per season (re-use this later on, gets messy) +plotDT <- ggLightingDT[, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + nObs = .N), keyby = .(hms, season)] + +# aggregate to half-hour per hh per season (re-use this later on, gets messy) +seasonLHhDT <- ggLightingDT[, .(meankW = mean(meanPowerW/1000), + sdW = sd(meanPowerW), + nObs = .N), keyby = .(linkID, hms, season)] + +# aggregate plot per hh +p <- ggplot2::ggplot(seasonLHhDT, aes(x = hms, y = meankW, colour = linkID)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (within household)", + caption = paste0(myCaption, + "\n Lighting circuits, n = ", uniqueN(seasonLHhDT$linkID), + "\n Peak demand periods shaded" + ) + ) + +yMin <- min(seasonLHhDT$meankW) +yMax <- max(seasonLHhDT$meankW) +addPeaks(p) + +plotly::ggplotly(p) + +``` + +```{r l.season.linkAggToSynth} +setkey(seasonLHhDT, linkID) +message("N households with demand data: ", uniqueN(seasonLHhDT$linkID)) + +setkey(ipfWeights2013DT, linkID) +message("N unique households in synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +#dataDT <- seasonLHhDT[ipfWeights2013DT, allow.cartesian=TRUE] +dataDT <- ipfWeights2013DT[seasonLHhDT, allow.cartesian=TRUE] +message("N unique households in merged power data & synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +message("We will have NA values for power etc for households who were in the survey but for whom we have no power data.") +message("We will have 0 values for ipfWeight for for households who are not needed in a particular areaCode.") + +summary(dataDT) + +message("We can drop both of these to save memory.") + +dataDT <- dataDT[!is.na(meankW) | ipfWeight > 0] + +summary(dataDT) + +table(dataDT$REGC2013_label) + +``` + +Construct unweighted area profiles for comparison. + +```{r l.season.AreaProfiles.Unweighted} +# no weights +meanProfilesUnwDT <- dataDT[!is.na(meankW), .(meankW = mean(meankW)), keyby = .(hms, AU2013_label, season)] + +p <- ggplot2::ggplot(meanProfilesUnwDT, aes(x = hms, y = meankW, colour = AU2013_label)) + + geom_line() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + theme(legend.position="none") + # hide for clarity + labs(x = "Time of Day", y = "Mean kW", + caption = paste0(myCaption, + "\n Lighting circuits", + "\n n households (unweighted) = ", uniqueN(dataDT$linkID), + " replicated over ",uniqueN(meanProfilesUnwDT$AU2013_label) , " areas", + "\n Peak demand periods shaded" + ) + ) +yMin <- min(meanProfilesUnwDT$meankW) +yMax <- max(meanProfilesUnwDT$meankW) +addPeaks(p) + +``` + +Now construct the weighted area profiles. + +```{r l.season.AreaProfiles.Weighted} + +# with weights + + +svytDT <- drake::readd(auLightingLoad) +svytDT[, AU2013_label := area] + +# add regional labels back +setkey(svytDT, AU2013_label) +setkey(auLabelsDT, AU2013_label) +svytDT <- auLabelsDT[svytDT] + +svytDT <- svytDT[, meankW := meanW/1000] # do this here so addplot can use it + +totalIpfHHs <- sum(ipfWeights2013DT$ipfWeight) # sum of weights = n hhs + +p <- ggplot2::ggplot(svytDT, aes(x = hms, y = meankW, colour = area)) + + geom_step() + + facet_grid(season ~ REGC2013_label) + + scale_colour_viridis(discrete = TRUE) + + theme(legend.position="none") + # hide for clarity + labs(y = "Mean kW", + x = "Time of Day", + caption = paste0(myCaption, + "\n Lighting circuits", + "\n n households (base) = ", uniqueN(dataDT$linkID), + "\n n households (reweighted) = ", dkUtils::tidyNum(totalIpfHHs), + "\n Plotting ",uniqueN(svytDT$AU2013_label) , " areas", + "\n Peak demand periods shaded" + ) + ) +yMin <- min(svytDT$meankW) +yMax <- max(svytDT$meankW) +addPeaks(p) + +plotly::ggplotly(p) + +``` + + +## Area units - Lighting: Efficiency Model + +```{r set efficiency} +# update using Carsten's figures +# - if we switched an incandescent bulb to LED what is the % W saving? +# -> 94% +# +# - if we switched a cfl bulb to LED what is the % W saving? +# -> 67% +# +# - if we switched a ELV bulb to LED what is the % W saving? +# -> 91% +# EnergyConsult PTY LTD. (2015). Residential Energy Baseline Study: New Zealand. Retrieved from http://energyrating.gov.au/sites/new.energyrating/files/documents/Report_Residential_Baseline_Study_for_New_Zealand_2000_-_2030_0_0.pdf + +iToLED <- 1-0.94 # see above +cflToLED <- 1-0.67 # see above +hToLED <- 1-0.91 # see above +``` + +This simple model adjusts each lighting power observation for each household as follows: + + * for households with 'mostly incandescent bulbs' - we assume a switch to LEDs will reduce power demand for lighting by `r 100*(1-iToLED)` %; + * for households with 'mostly energy saving - cfl bulbs' - we assume a switch to LEDs will reduce power demand for lighting by `r 100*(1-cflToLED)` %; + * for households with 'mostly Halogen' - we assume a switch to LEDs will reduce power demand for lighting by `r 100*(1-hToLED)` %; + * for households with 'mostly LEDs' and where we don't know the bulb type we assume no change; + +This is based on calculations derived from the New Zealand Residential Energy Baseline Study [@nzbaseline2015] + +```{r l.eff.model} +setkey(ggLightingDT, linkID) +setkey(ggHhDT, linkID) +setkey(hhAttributesDT, linkID) +ggHhDT <- ggHhDT[hhAttributesDT[, .(linkID, Q49_coded)]] # add the light bulb question +ggLightingDT <- ggHhDT[, .(linkID, Q49_coded, Location)][ggLightingDT] + +t <- table(ggHhDT$Q49_coded) +prop.table(t)*100 + +t <- table(ggHhDT$Q49_coded, ggHhDT$Location, useNA = "always") + +kableExtra::kable(t, caption = "Bulb type distribution (survey data)") %>% + kable_styling() + +t <- table(ggLightingDT$Q49_coded, ggLightingDT$Location, useNA = "always") +kableExtra::kable(t, caption = "nObs in lighting data by location and bulb type - note the lighting circuits we have for Taranaki are all 'mostly incandescents'") %>% + kable_styling() + +# efficiency model +ggLightingDT <- ggLightingDT[, meanPowerWefficient := meanPowerW] # if we don't know or anything else, leave as-is +ggLightingDT <- ggLightingDT[Q49_coded == "Incandescent", meanPowerWefficient := meanPowerW*iToLED] +ggLightingDT <- ggLightingDT[Q49_coded == "Energy saving - cfl", meanPowerWefficient := meanPowerW*cflToLED] +ggLightingDT <- ggLightingDT[Q49_coded == "Halogen", meanPowerWefficient := meanPowerW*hToLED] + +t <- ggLightingDT[, .(nObs = .N, + meanW = mean(meanPowerW), + meanWefficient = mean(meanPowerWefficient), + meanWSaving = mean(meanPowerWefficient - meanPowerW), + #meanWSaving_pc = 100*((meanPowerWefficient - meanPowerW)/meanPowerW), + nHouseholds = uniqueN(linkID)), keyby = .(Location, Q49_coded)] + +kableExtra::kable(t, caption = "Overall effect of efficiency model (note we only have 2 lighting circuits in Taranaki)") %>% + kable_styling() + +``` + +```{r l.eff.season.byID} + +myCaption <- paste0("GREENGrid household power demand data from ", min(ggLightingDT$r_dateTimeNZ), " to ", max(ggLightingDT$r_dateTimeNZ)) + +# aggregate to half-hour per hh per season (re-use this later on, gets messy) +plotDT <- ggLightingDT[, .(meanWeff = mean(meanPowerWefficient), + sdW = sd(meanPowerWefficient), + nObs = .N), keyby = .(hms, season)] + +# aggregate to half-hour per hh per season (re-use this later on, gets messy) +seasonLHhDT <- ggLightingDT[, .(meankWeff = mean(meanPowerWefficient/1000), + sdkWeff = sd(meanPowerWefficient/1000), + meankW = mean(meanPowerW/1000), + sdkW = sd(meanPowerW/1000), + nObs = .N), keyby = .(linkID, hms, season)] + +# aggregate plot per hh +p1 <- ggplot2::ggplot(seasonLHhDT, aes(x = hms, y = meankW, colour = linkID)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (within household)", + caption = paste0(myCaption, + "\n Lighting circuits, n = ", uniqueN(seasonLHhDT$linkID), + "\n Peak demand periods shaded" + ) + ) + +yMin <- min(seasonLHhDT$meankW) +yMax <- max(seasonLHhDT$meankW) +addPeaks(p1) + +p2 <- ggplot2::ggplot(seasonLHhDT, aes(x = hms, y = meankWeff, colour = linkID)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + labs(y = "Mean kW (within household)", + caption = paste0(myCaption, + "\n Lighting circuits, n = ", uniqueN(seasonLHhDT$linkID), + "\n Peak demand periods shaded" + ) + ) + +yMin <- min(seasonLHhDT$meankWeff) +yMax <- max(seasonLHhDT$meankWeff) +addPeaks(p2) + + +#plotly::ggplotly(p) + +``` + +```{r l.eff.season.linkAggToSynth} +setkey(seasonLHhDT, linkID) +message("N households with demand data: ", uniqueN(seasonLHhDT$linkID)) + +setkey(ipfWeights2013DT, linkID) +message("N unique households in synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +#dataDT <- seasonLHhDT[ipfWeights2013DT, allow.cartesian=TRUE] +dataDT <- ipfWeights2013DT[seasonLHhDT, allow.cartesian=TRUE] +message("N unique households in merged power data & synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +message("We will have NA values for power etc for households who were in the survey but for whom we have no power data.") +message("We will have 0 values for ipfWeight for for households who are not needed in a particular areaCode.") + +summary(dataDT) + +message("We can drop both of these to save memory.") + +dataEffDT <- dataDT[!is.na(meankWeff) | ipfWeight > 0] + +summary(dataEffDT) + +``` + +Construct unweighted overall mean profiles for comparison. + +```{r l.eff.season.AreaProfiles.Unweighted} +# no weights +meanProfilesUnwDT <- dataEffDT[!is.na(meankW), .( meankW= mean(meankW), + meankWeff = mean(meankWeff) + ), keyby = .(hms, season)] + +p <- ggplot2::ggplot(meanProfilesUnwDT, aes(x = hms)) + + geom_line(aes(y = meankW, colour = "Mean")) + + geom_line(aes(y = meankWeff, colour = "Mean efficient")) + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + theme(legend.position="none") + # hide for clarity + labs(x = "Time of Day", y = "Mean kW", + caption = paste0(myCaption, + "\n Lighting circuits", + "\n n households (unweighted) = ", uniqueN(dataDT$linkID), + " replicated over ",uniqueN(meanProfilesUnwDT$AU2013_label) , " areas", + "\n Peak demand periods shaded" + ) + ) +yMin <- min(meanProfilesUnwDT$meankW) +yMax <- max(meanProfilesUnwDT$meankW) +addPeaks(p) + +``` + +Now construct the weighted area profiles. + +```{r l.eff.season.AreaProfiles.Weighted} + +# with weights +# set svydesign +dataDT <- dataDT[, c_hms := as.character(hms)] # svy breaks on hms for some reason +dataDT <- dataDT[, powerkWdiff := meankWeff - meankW] # power saving +dataDT <- dataDT[, powerkWdiff_pr := (meankWeff - meankW)/meankW] # power saving proportion + +message("Setting survey design using weights") +dataDTSvyDes <- survey::svydesign(ids = ~linkID, # use synthetic hh_id + weights = ~ipfWeight, + data = dataDT[!is.na(ipfWeight)] # make sure no NA weights +) + +``` + +```{r plot_meanSaving} +# should be in drake too really +message("Calculating weighted power summaries - can take some time...") +system.time(svyt <- svyby(~powerkWdiff, ~AU2013_label+c_hms+season, dataDTSvyDes, + svymean, + keep.var = TRUE, + exclude=NULL, + na.action=na.pass + ) + ) + +svytDT <- as.data.table(svyt) # this is just so cool - autoconversion to data.table :-) +# fix the time thing +svytDT$hms <- hms::parse_hm(svytDT$c_hms) + +svytDT[, AU2013_label := area] + +# add regional labels back +setkey(svytDT, AU2013_label) +setkey(auLabelsDT, AU2013_label) +svytDT <- auLabelsDT[svytDT] + +svytDT <- svytDT[, meankW := meanW/1000] # do this here so addplot can use it + +totalIpfHHs <- sum(ipfWeights2013DT$ipfWeight) # sum of weights = n hhs + +p <- ggplot2::ggplot(svytDT, aes(x = hms, y = powerkWdiff, colour = AU2013_label)) + + geom_step() + + facet_grid(season ~ REGC2013_label) + + scale_colour_viridis(discrete = TRUE) + + theme(legend.position="none") + # hide for clarity + labs(y = "Mean kW saved", + x = "Time of Day", + caption = paste0(myCaption, + "\n Lighting circuits", + "\n n households (base) = ", uniqueN(dataDT$linkID), + "\n n households (reweighted) = ", dkUtils::tidyNum(totalIpfHHs), + "\n Plotting ",uniqueN(svytDT$AU2013_label) , " areas", + "\n Peak demand periods shaded" + ) + ) +yMin <- min(svytDT$powerkWdiff) +yMax <- max(svytDT$powerkWdiff) +addPeaks(p) + +plotly::ggplotly(p) + +``` + +We can try to plot the % saving but it will be constant as the savings are a constant %. + +```{r plot_meanPcSaving} +message("Calculating weighted power summaries - can take some time...") +system.time(svyt <- svyby(~powerkWdiff_pr, ~AU2013_label+c_hms+season, dataDTSvyDes, + svymean, + keep.var = TRUE, + exclude=NULL, + na.action=na.pass + ) + ) + +svytDT <- as.data.table(svyt) # this is just so cool - autoconversion to data.table :-) +# fix the time thing +svytDT$hms <- hms::parse_hm(svytDT$c_hms) + +setkey(svytDT, AU2013_label) + +# add regional labels back +setkey(svytDT, AU2013_label) +setkey(auLabelsDT, AU2013_label) +svytDT <- svytDT[auLabelsDT] + +svytDT <- svytDT[, powerkWdiff_pc := powerkWdiff_pr * 100] + +p <- ggplot2::ggplot(svytDT, aes(x = hms, y = powerkWdiff_pc, colour = AU2013_label)) + + geom_step() + + facet_grid(season ~ REGC2013_label) + + scale_colour_viridis(discrete = TRUE) + + theme(legend.position="none") + # hide for clarity + labs(y = "Mean % kW saved", + x = "Time of Day", + caption = paste0(myCaption, + "\n Lighting circuits", + "\n n households (base) = ", uniqueN(dataDT$linkID), + "\n n households (reweighted) = ", dkUtils::tidyNum(totalIpfHHs), + "\n Plotting ",uniqueN(svytDT$AU2013_label) , " areas", + "\n Peak demand periods shaded" + ) + ) +yMin <- min(svytDT$powerkWdiff_pc) +yMax <- max(svytDT$powerkWdiff_pc) +addPeaks(p) + +plotly::ggplotly(p) + +``` + +What we really want to do is see what % of national load this takes out. To do this we convert the total energy saved to to mean MWh and subtract from the national mean MWh consumption profile as captured by the EA grid exit point consumption data for 2015. + +```{r gxpCompare} +# so the logis is: +# we have gxp data at kWh per half-hour +# we have summed it per season/weekday/half hour and divided by the number of days +summary(gxpSummaryDT) +``` + + +## Area units - Total load: Mean seasonal + +```{r total.season.byID} + +myCaption <- paste0("GREENGrid household power demand data from ", min(hhTotalLoadDT$r_dateTimeNZ), " to ", max(hhTotalLoadDT$r_dateTimeNZ)) + +# aggregate to half-hour per hh per season (re-use this later on, gets messy) +plotDT <- ggHhDT[, .(linkID, pvInverter)][hhTotalLoadDT][, .(meanW = mean(meanPowerW), + sdW = sd(meanPowerW), + sumW = sum(meanPowerW), + nObs = .N), keyby = .(linkID, hms, season, pvInverter)] + +# aggregate to half-hour per hh per season (re-use this later on, gets messy) +seasonTotalHhDT <- ggHhDT[, .(linkID, pvInverter)][hhTotalLoadDT][, .(meankW = mean(meanPowerW/1000), + sdW = sd(meanPowerW), + nObs = .N), keyby = .(linkID, hms, pvInverter, season)] + +# aggregate plot per hh +p <- ggplot2::ggplot(seasonTotalHhDT, aes(x = hms, y = meankW, colour = linkID)) + + geom_step() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ pvInverter) + + labs(y = "Mean kW (within household)", + caption = paste0(myCaption, + "\n n Households = ", uniqueN(seasonTotalHhDT$linkID), + "\n Peak demand periods shaded" + ) + ) + +yMin <- min(seasonTotalHhDT$meankW) +yMax <- max(seasonTotalHhDT$meankW) +addPeaks(p) + +plotly::ggplotly(p) + +``` + +```{r total.season.linkAggToSynth} +setkey(seasonTotalHhDT, linkID) +message("N households with demand data: ", uniqueN(seasonTotalHhDT$linkID)) + +setkey(ipfWeights2013DT, linkID) +message("N unique households in synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +#dataDT <- seasonLHhDT[ipfWeights2013DT, allow.cartesian=TRUE] +dataDT <- ipfWeights2013DT[seasonTotalHhDT, allow.cartesian=TRUE] +message("N unique households in merged power data & synthetic census: ", uniqueN(ipfWeights2013DT$linkID)) + +message("We will have NA values for power etc for households who were in the survey but for whom we have no power data.") +message("We will have 0 values for ipfWeight for for households who are not needed in a particular areaCode.") + +summary(dataDT) + +message("We can drop both of these to save memory.") + +dataDT <- dataDT[!is.na(meankW) | ipfWeight > 0] + +summary(dataDT) + +table(dataDT$REGC2013_label) + +``` + +Construct unweighted area profiles for comparison. + +```{r total.season.AreaProfiles.Unweighted} +# no weights +meanProfilesUnwDT <- dataDT[!is.na(meankW), .(meankW = mean(meankW)), keyby = .(hms, AU2013_label, season)] + +p <- ggplot2::ggplot(meanProfilesUnwDT, aes(x = hms, y = meankW, colour = AU2013_label)) + + geom_line() + + scale_colour_viridis(discrete = TRUE) + + facet_grid(season ~ .) + + theme(legend.position="none") + # hide for clarity + labs(x = "Time of Day", y = "Mean kW", + caption = paste0(myCaption, + "\n Total load", + "\n n households (unweighted) = ", uniqueN(dataDT$linkID), + " replicated over ",uniqueN(meanProfilesUnwDT$AU2013_label) , " areas", + "\n Peak demand periods shaded" + ) + ) +yMin <- min(meanProfilesUnwDT$meankW) +yMax <- max(meanProfilesUnwDT$meankW) +addPeaks(p) + +``` + +Now construct the weighted area profiles. + +```{r total.season.AreaProfiles.Weighted} +# with weights + +svytDT <- drake::readd(auTotalLoad) + +svytDT[, AU2013_label := area] + +# add regional labels back +setkey(svytDT, AU2013_label) +setkey(auLabelsDT, AU2013_label) +svytDT <- auLabelsDT[svytDT] + +svytDT <- svytDT[, meankW := meanW/1000] # do this here so addplot can use it + +p <- ggplot2::ggplot(svytDT, aes(x = hms, y = meanW, colour = area)) + + geom_step() + + facet_grid(season ~ REGC2013_label) + + scale_colour_viridis(discrete = TRUE) + + theme(legend.position="none") + # hide for clarity + labs(y = "Mean kW", + x = "Time of Day", + caption = paste0(myCaption, + "\n Total load", + "\n n households (base) = ", uniqueN(dataDT$linkID), + "\n n households (reweighted) = ", dkUtils::tidyNum(totalIpfHHs), + "\n Plotting ",uniqueN(svytDT$area) , " areas", + "\n Peak demand periods shaded" + ) + ) +yMin <- min(svytDT$meankW) +yMax <- max(svytDT$meankW) +addPeaks(p) + +plotly::ggplotly(p) + +``` + + +# Acknowledgements + +```{r generic.Ack, child=p_Params$ackLong} +# generic acks +``` + +# About + + +```{r check.runtime} +t <- proc.time() - startTime + +elapsed <- t[[3]] +``` + + +```{r generic.About, child=p_Params$spatialecAbout} +# generic about include +``` + +Analysis completed in `r elapsed` seconds ( `r round(elapsed/60,2)` minutes) using [knitr](https://cran.r-project.org/package=knitr) in [RStudio](http://www.rstudio.com) with `r R.version.string` running on `r R.version$platform`. + +Additional R packages used in this report (`r myPackages`): + + * data.table - for fast data munching [@data.table] + * dkUtils - utilities [@dkUtils] + * ggplot2 - for slick graphics [@ggplot2] + * GREENGridData - manipulating GREENGrid demand data [@GREENGridData] + * hms - HH:MM:SS conversions [@hms] + * ipfp - for IPF process [@ipfp] + * kableExtra - for pretty tables [@kableExtra] + * knitr - to create this document [@knitr] + * lubridate - date and time conversions [@lubridate] + * plotly - for interactive graphics [@plotly] + * readr - for data loading [@readr] + * skimr - for data summaries [@skimr] + * survey - for weighted data summaries [@survey16] + * viridis - for color palettes [@viridis] + +# Annexes + +## Green Grid Survey data + +```{r skimGgSurveyData} +if(exists("ggHhDT")){ + skimr::skim(ggHhDT) +} +``` + +## Green Grid Power data + +### Lighting + +```{r skimGgLightingData} +if(exists("ggLightingOrigDT")){ + h <- head(ggLightingOrigDT) +kableExtra::kable(h, caption = "GREENGrid Household lighting demand data: original") %>% + kable_styling() + skimr::skim(ggLightingOrigDT) +} +``` + +### Heat Pump + +```{r skimGgHpData} +if(exists("ggHpOrigDT")){ + h <- head(ggHpOrigDT) +kableExtra::kable(h, caption = "GREENGrid Household heat pump demand data: original") %>% + kable_styling() + skimr::skim(ggHpOrigDT) +} +``` + +### Hot water + +```{r skimGgHwData} +if(exists("ggHwOrigDT")){ + h <- head(ggHwOrigDT) +kableExtra::kable(h, caption = "GREENGrid Household hot water demand data: original") %>% + kable_styling() + skimr::skim(ggHwOrigDT) +} +``` + +## Area Unit Variables + +```{r listAreaUnitVariables} +if(exists("au2013DT")){ + t <- table(au2013DT$variable) +kableExtra::kable(t, caption = "Area units: available variables") %>% + kable_styling() +} +``` + +# References \ No newline at end of file