From ed3f9dd035796c07c6c68e0ee3e46fa907e41ddb Mon Sep 17 00:00:00 2001
From: Ben Anderson <dataknut@icloud.com>
Date: Fri, 15 Nov 2019 08:42:35 +1300
Subject: [PATCH] new vesion, uses drake; adds gxp data

---
 .../makeFile_GREENGridModel_v2.0.R            | 253 ++++++++++++++++++
 1 file changed, 253 insertions(+)
 create mode 100644 analysis/GREENGridModel/makeFile_GREENGridModel_v2.0.R

diff --git a/analysis/GREENGridModel/makeFile_GREENGridModel_v2.0.R b/analysis/GREENGridModel/makeFile_GREENGridModel_v2.0.R
new file mode 100644
index 0000000..e9216a4
--- /dev/null
+++ b/analysis/GREENGridModel/makeFile_GREENGridModel_v2.0.R
@@ -0,0 +1,253 @@
+# loads data & runs a report
+
+# Load some packages
+require(spatialec)
+
+libs <- c("GREENGridData", # data utilities
+          "data.table", # data munching
+          "drake", # data pre-loading etc
+          "here", # here. not there
+          "skimr", # skimming data for fast descriptives
+          "survey" # survey estimates
+          ) 
+
+spatialec::loadLibraries(libs) # should install any that are missing
+
+spatialec::setup() # set data paths etc
+
+# parameters ----
+
+#> GXP data ----
+gxpF <- paste0(p_Params$gxpPath, "gxpDataDT_latest.csv")
+
+# > IPF weights based on census 2013 ----
+ipfCensusF <- paste0(p_Params$censusPath, "/processed/2013IpfInput.csv")
+
+#> GREEN Grid half hourly power data ----
+# we're going to re-use the data we created for EECA
+#>> whole dwelling ----
+hhTotalLoadF <- paste0(p_Params$ggPath, 
+                       "gridSpy/halfHour/extracts/halfHourImputedTotalDemand.csv.gz")
+
+#>> heat pump ----
+hhHPLoadF <- paste0(p_Params$ggPath, 
+                    "gridSpy/halfHour/extracts/halfHourHeatPump.csv.gz")
+#>> hot water ----
+hhHWLoadF <- paste0(p_Params$ggPath, 
+                    "gridSpy/halfHour/extracts/halfHourHotWater.csv.gz")
+#>> lighting ----
+hhLightingLoadF <- paste0(p_Params$ggPath, 
+                          "gridSpy/halfHour/extracts/halfHourLighting.csv.gz")
+
+#> GREEN Grid household survey data ----
+hhAttributesF <- paste0(p_Params$ggPath,
+                        "survey/ggHouseholdAttributesSafe_2019-10-20.csv.gz") # latest version 
+
+#> ipf weights data from previous run of model ----
+ipfWeightsF <- paste0(p_Params$ggPath, 
+                      "ipf/nonZeroWeightsAu2013.csv")
+
+# > defn of peak ----
+amPeakStart <- hms::as_hms("07:00:00")
+amPeakEnd <- hms::as_hms("09:00:00")
+pmPeakStart <- hms::as_hms("17:00:00") # see https://www.electrickiwi.co.nz/hour-of-power
+pmPeakEnd <- hms::as_hms("21:00:00") # see https://www.electrickiwi.co.nz/hour-of-power
+
+# functions ----
+
+loadGxpData <- function(f){
+  dt <- data.table::fread(f)
+  # do any extra cleaning here
+  dt[, r_dateTime := lubridate::as_datetime(rDateTime, 
+                                                       tz = "Pacific/Auckland")]
+  dt[, year := lubridate::year(r_dateTime)]
+  dt <- GREENGridData::addNZSeason(dt)
+  return(dt)
+}
+
+loadHalfHourPower <- function(f){
+  # this assumes they have the same format
+  message("Loading: ", f)
+  dt <- data.table::fread(f) # super fast
+  # fix date times
+  dt[, r_dateTime := lubridate::as_datetime(r_dateTimeHalfHour)]
+  dt[, r_dateTimeNZ := lubridate::with_tz(r_dateTime, tzone = "Pacific/Auckland")]
+  dt[, hms := hms::as_hms(r_dateTimeNZ)]
+  # add season
+  dt <- GREENGridData::addNZSeason(dt) 
+  return(dt)
+}
+
+
+loadDrake <- function(drakeThing){
+  dt <- drake::readd(drakeThing) # breaks, why?
+  return(dt)
+}
+
+makeSAETable <- function(dt, from, to){
+  # creates small area profiles for the power data passed in as dt
+  # produces weighted means (& se) 
+  # can set dates from & to (inclusive)
+  # always aagregates by season even if only a single day
+  # aggregates by hms in the data (which can be whatever you want)
+  
+  weights <- drake::readd(ipfWeights2013)
+  weights[, area := AU2013_label]
+  nAreas <- uniqueN(weights$area)
+  
+  #powerDT <- drake::readd(hhHPLoad) # for testing
+  #powerDT <- drake::readd(get(drakeObject))
+  powerDT <- dt
+  powerDT[, date := lubridate::as_date(r_dateTimeNZ)]
+  nHouseholds <- uniqueN(powerDT$linkID)
+  
+  message("Making SAE table for: ", nAreas ,
+          " areas using ", nHouseholds, " households from ",
+          from, " to ", to)
+  
+  data <- powerDT[date >= from & date <= to, # allow date range
+                  .(meanW = mean(meanPowerW)), keyby = .(linkID,
+                                                         season,
+                                                         hms)]
+  
+  dt <- makeSvyTable(data, weights) # creates a mean by season, hms & area
+  return(dt)
+}
+
+makeSvyTable <- function(data, weights){
+  # assumes a variable called 'area' defines the areas you are aggregating to
+  # doesn't care what's in it
+  # aggregates to season using hms (which can be any time frame you want as it's converted to char)
+  # takes a long time so best to pre-run if we can in drake
+  # join the weights table to the data table
+  setkey(data, linkID)
+  setkey(weights, linkID)
+  dt <- weights[data, allow.cartesian=TRUE]
+  # make the result into a svydes
+  dt <- dt[, c_hms := as.character(hms)] # svy breaks on hms for some reason
+  message("Setting survey design using weights")
+  # the  bigger the data set the longer this takes
+  svydes <- survey::svydesign(ids = ~linkID, # use synthetic hh_id
+                                    weights = ~ipfWeight, # hard coded
+                                    data = dt[!is.na(ipfWeight)] # make sure no NA weights
+  )
+  # assumes we want a mean
+  message("Calculating weighted table")
+  # the bigger the dataset the longer this takes
+  system.time(svyt <- survey::svyby(~meanW, ~ area + c_hms + season, # hard coded
+                            svydes, # the data 
+                            svymean, # get mean
+                            keep.var = TRUE,
+                            exclude=NULL, 
+                            na.action=na.pass
+                            )
+              )
+  dt <- as.data.table(svyt)
+  # put the region label back
+  labelsDT <- weights[, .(nMBs = mean(nMBs)), keyby = .(area = AU2013_label, REGC2013_label)]
+  setkey(labelsDT, area)
+  setkey(dt, area)
+  dt <- dt[labelsDT]
+  dt[, hms := hms::as_hms(c_hms)] # put hms back
+  return(dt)
+}
+
+loadWeights <- function(wf){
+  dt <- data.table::fread(wf)
+  # restrict regions
+  dt <- dt[REGC2013_label %like% "Hawke" | REGC2013_label %like% "Taranaki"]
+  # reduce areas for testing
+  #dt <- dt[AU2013_label %like% "Park"] # selection
+  t <- table(dt$AU2013_label, dt$REGC2013_label)
+  message("Filtered areas to ", uniqueN(dt$AU2013_label), " areas from ",
+          uniqueN(dt$REGC2013_label), " regions")
+  return(dt)
+}
+
+makeReport <- function(inF,outF){
+  # default = html
+  rmarkdown::render(input = inF,
+                    params = list(title = title,
+                                  subtitle = subtitle,
+                                  authors = authors),
+                    output_file = outF
+  )
+}
+
+testFiles <- function(){
+  file.info(ipfWeightsF)
+  file.info(ipfCensusF)
+  file.info(ipfSurveyF)
+  file.info(hhAttributesF)
+  file.info(hhTotalLoadF)
+  file.info(hhHPLoadF)
+  file.info(hhHWLoadF)
+  file.info(hhLightingLoadF)
+}
+
+# Drake plan ----
+
+#this is where we use drake if we can
+
+# defaults
+from <- as.Date("2015-01-01")
+to <- as.Date("2015-12-31")
+
+plan <- drake::drake_plan(
+  gxpData = loadGxpData(gxpF),
+  ipfWeights2013 = loadWeights(ipfWeightsF),
+  hhTotalLoad = loadHalfHourPower(hhTotalLoadF),
+  hhHPLoad = loadHalfHourPower(hhHPLoadF),
+  hhHWLoad = loadHalfHourPower(hhHWLoadF),
+  hhLightingLoad = loadHalfHourPower(hhLightingLoadF),
+  auTotalLoad = makeSAETable(dt = hhTotalLoad, from, to), # big one
+  auHPLoad = makeSAETable(dt = hhHPLoad, from, to),
+  auHWLoad = makeSAETable(dt = hhHWLoad, from, to),
+  auHWLoad2015_05_25 = makeSAETable(dt = hhHWLoad, 
+                                    from = as.Date("2015-05-25"),
+                                    to = as.Date("2015-05-25")), # single day
+  auLightingLoad = makeSAETable(dt = hhLightingLoad, from, to)
+)
+
+
+# Code ----
+
+# > run drake plan ----
+plan # test the plan
+make(plan) # run the plan, re-loading data if needed
+
+
+#> Load census data ----
+# will load the latest version - fast so do it here
+
+ipfCensusDT <- data.table::fread(ipfCensusF)
+
+#> Load household data ----
+# small so load here
+hhAttributesDT <- data.table::fread(hhAttributesF)
+# fix PV inverter
+hhAttributesDT[, `PV Inverter` := ifelse(`PV Inverter` == "", "No", "Yes")]
+
+
+# > Load IPF survey input if needed - will load the latest version ----
+# small so load here
+ipfSurveyF <- paste0(p_Params$ggPath, "survey/ggIpfInput.csv")
+ipfSurveyDT <- data.table::fread(ipfSurveyF)
+
+
+# > Make report ----
+# >> yaml ----
+version <- "2.0_all_AUs"
+title <- paste0("GREENGrid area unit level electricity demand model")
+subtitle <- paste0("Version: ", version)
+authors <- "Ben Anderson (University of Otago & University of Southampton, ben.anderson@otago.ac.nz)"
+
+# quick test - up to date?
+testFiles()
+
+# >> run report ----
+inF <- paste0(p_Params$projRoot, "/analysis/GREENGridModel/GREENGridModel_v2.Rmd")
+outF <- paste0(p_Params$projRoot,"/analysis/GREENGridModel/GREENGridModel_v", version, ".html")
+#makeReport(inF,outF)
+
+
-- 
GitLab