From ed3f9dd035796c07c6c68e0ee3e46fa907e41ddb Mon Sep 17 00:00:00 2001 From: Ben Anderson <dataknut@icloud.com> Date: Fri, 15 Nov 2019 08:42:35 +1300 Subject: [PATCH] new vesion, uses drake; adds gxp data --- .../makeFile_GREENGridModel_v2.0.R | 253 ++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 analysis/GREENGridModel/makeFile_GREENGridModel_v2.0.R diff --git a/analysis/GREENGridModel/makeFile_GREENGridModel_v2.0.R b/analysis/GREENGridModel/makeFile_GREENGridModel_v2.0.R new file mode 100644 index 0000000..e9216a4 --- /dev/null +++ b/analysis/GREENGridModel/makeFile_GREENGridModel_v2.0.R @@ -0,0 +1,253 @@ +# loads data & runs a report + +# Load some packages +require(spatialec) + +libs <- c("GREENGridData", # data utilities + "data.table", # data munching + "drake", # data pre-loading etc + "here", # here. not there + "skimr", # skimming data for fast descriptives + "survey" # survey estimates + ) + +spatialec::loadLibraries(libs) # should install any that are missing + +spatialec::setup() # set data paths etc + +# parameters ---- + +#> GXP data ---- +gxpF <- paste0(p_Params$gxpPath, "gxpDataDT_latest.csv") + +# > IPF weights based on census 2013 ---- +ipfCensusF <- paste0(p_Params$censusPath, "/processed/2013IpfInput.csv") + +#> GREEN Grid half hourly power data ---- +# we're going to re-use the data we created for EECA +#>> whole dwelling ---- +hhTotalLoadF <- paste0(p_Params$ggPath, + "gridSpy/halfHour/extracts/halfHourImputedTotalDemand.csv.gz") + +#>> heat pump ---- +hhHPLoadF <- paste0(p_Params$ggPath, + "gridSpy/halfHour/extracts/halfHourHeatPump.csv.gz") +#>> hot water ---- +hhHWLoadF <- paste0(p_Params$ggPath, + "gridSpy/halfHour/extracts/halfHourHotWater.csv.gz") +#>> lighting ---- +hhLightingLoadF <- paste0(p_Params$ggPath, + "gridSpy/halfHour/extracts/halfHourLighting.csv.gz") + +#> GREEN Grid household survey data ---- +hhAttributesF <- paste0(p_Params$ggPath, + "survey/ggHouseholdAttributesSafe_2019-10-20.csv.gz") # latest version + +#> ipf weights data from previous run of model ---- +ipfWeightsF <- paste0(p_Params$ggPath, + "ipf/nonZeroWeightsAu2013.csv") + +# > defn of peak ---- +amPeakStart <- hms::as_hms("07:00:00") +amPeakEnd <- hms::as_hms("09:00:00") +pmPeakStart <- hms::as_hms("17:00:00") # see https://www.electrickiwi.co.nz/hour-of-power +pmPeakEnd <- hms::as_hms("21:00:00") # see https://www.electrickiwi.co.nz/hour-of-power + +# functions ---- + +loadGxpData <- function(f){ + dt <- data.table::fread(f) + # do any extra cleaning here + dt[, r_dateTime := lubridate::as_datetime(rDateTime, + tz = "Pacific/Auckland")] + dt[, year := lubridate::year(r_dateTime)] + dt <- GREENGridData::addNZSeason(dt) + return(dt) +} + +loadHalfHourPower <- function(f){ + # this assumes they have the same format + message("Loading: ", f) + dt <- data.table::fread(f) # super fast + # fix date times + dt[, r_dateTime := lubridate::as_datetime(r_dateTimeHalfHour)] + dt[, r_dateTimeNZ := lubridate::with_tz(r_dateTime, tzone = "Pacific/Auckland")] + dt[, hms := hms::as_hms(r_dateTimeNZ)] + # add season + dt <- GREENGridData::addNZSeason(dt) + return(dt) +} + + +loadDrake <- function(drakeThing){ + dt <- drake::readd(drakeThing) # breaks, why? + return(dt) +} + +makeSAETable <- function(dt, from, to){ + # creates small area profiles for the power data passed in as dt + # produces weighted means (& se) + # can set dates from & to (inclusive) + # always aagregates by season even if only a single day + # aggregates by hms in the data (which can be whatever you want) + + weights <- drake::readd(ipfWeights2013) + weights[, area := AU2013_label] + nAreas <- uniqueN(weights$area) + + #powerDT <- drake::readd(hhHPLoad) # for testing + #powerDT <- drake::readd(get(drakeObject)) + powerDT <- dt + powerDT[, date := lubridate::as_date(r_dateTimeNZ)] + nHouseholds <- uniqueN(powerDT$linkID) + + message("Making SAE table for: ", nAreas , + " areas using ", nHouseholds, " households from ", + from, " to ", to) + + data <- powerDT[date >= from & date <= to, # allow date range + .(meanW = mean(meanPowerW)), keyby = .(linkID, + season, + hms)] + + dt <- makeSvyTable(data, weights) # creates a mean by season, hms & area + return(dt) +} + +makeSvyTable <- function(data, weights){ + # assumes a variable called 'area' defines the areas you are aggregating to + # doesn't care what's in it + # aggregates to season using hms (which can be any time frame you want as it's converted to char) + # takes a long time so best to pre-run if we can in drake + # join the weights table to the data table + setkey(data, linkID) + setkey(weights, linkID) + dt <- weights[data, allow.cartesian=TRUE] + # make the result into a svydes + dt <- dt[, c_hms := as.character(hms)] # svy breaks on hms for some reason + message("Setting survey design using weights") + # the bigger the data set the longer this takes + svydes <- survey::svydesign(ids = ~linkID, # use synthetic hh_id + weights = ~ipfWeight, # hard coded + data = dt[!is.na(ipfWeight)] # make sure no NA weights + ) + # assumes we want a mean + message("Calculating weighted table") + # the bigger the dataset the longer this takes + system.time(svyt <- survey::svyby(~meanW, ~ area + c_hms + season, # hard coded + svydes, # the data + svymean, # get mean + keep.var = TRUE, + exclude=NULL, + na.action=na.pass + ) + ) + dt <- as.data.table(svyt) + # put the region label back + labelsDT <- weights[, .(nMBs = mean(nMBs)), keyby = .(area = AU2013_label, REGC2013_label)] + setkey(labelsDT, area) + setkey(dt, area) + dt <- dt[labelsDT] + dt[, hms := hms::as_hms(c_hms)] # put hms back + return(dt) +} + +loadWeights <- function(wf){ + dt <- data.table::fread(wf) + # restrict regions + dt <- dt[REGC2013_label %like% "Hawke" | REGC2013_label %like% "Taranaki"] + # reduce areas for testing + #dt <- dt[AU2013_label %like% "Park"] # selection + t <- table(dt$AU2013_label, dt$REGC2013_label) + message("Filtered areas to ", uniqueN(dt$AU2013_label), " areas from ", + uniqueN(dt$REGC2013_label), " regions") + return(dt) +} + +makeReport <- function(inF,outF){ + # default = html + rmarkdown::render(input = inF, + params = list(title = title, + subtitle = subtitle, + authors = authors), + output_file = outF + ) +} + +testFiles <- function(){ + file.info(ipfWeightsF) + file.info(ipfCensusF) + file.info(ipfSurveyF) + file.info(hhAttributesF) + file.info(hhTotalLoadF) + file.info(hhHPLoadF) + file.info(hhHWLoadF) + file.info(hhLightingLoadF) +} + +# Drake plan ---- + +#this is where we use drake if we can + +# defaults +from <- as.Date("2015-01-01") +to <- as.Date("2015-12-31") + +plan <- drake::drake_plan( + gxpData = loadGxpData(gxpF), + ipfWeights2013 = loadWeights(ipfWeightsF), + hhTotalLoad = loadHalfHourPower(hhTotalLoadF), + hhHPLoad = loadHalfHourPower(hhHPLoadF), + hhHWLoad = loadHalfHourPower(hhHWLoadF), + hhLightingLoad = loadHalfHourPower(hhLightingLoadF), + auTotalLoad = makeSAETable(dt = hhTotalLoad, from, to), # big one + auHPLoad = makeSAETable(dt = hhHPLoad, from, to), + auHWLoad = makeSAETable(dt = hhHWLoad, from, to), + auHWLoad2015_05_25 = makeSAETable(dt = hhHWLoad, + from = as.Date("2015-05-25"), + to = as.Date("2015-05-25")), # single day + auLightingLoad = makeSAETable(dt = hhLightingLoad, from, to) +) + + +# Code ---- + +# > run drake plan ---- +plan # test the plan +make(plan) # run the plan, re-loading data if needed + + +#> Load census data ---- +# will load the latest version - fast so do it here + +ipfCensusDT <- data.table::fread(ipfCensusF) + +#> Load household data ---- +# small so load here +hhAttributesDT <- data.table::fread(hhAttributesF) +# fix PV inverter +hhAttributesDT[, `PV Inverter` := ifelse(`PV Inverter` == "", "No", "Yes")] + + +# > Load IPF survey input if needed - will load the latest version ---- +# small so load here +ipfSurveyF <- paste0(p_Params$ggPath, "survey/ggIpfInput.csv") +ipfSurveyDT <- data.table::fread(ipfSurveyF) + + +# > Make report ---- +# >> yaml ---- +version <- "2.0_all_AUs" +title <- paste0("GREENGrid area unit level electricity demand model") +subtitle <- paste0("Version: ", version) +authors <- "Ben Anderson (University of Otago & University of Southampton, ben.anderson@otago.ac.nz)" + +# quick test - up to date? +testFiles() + +# >> run report ---- +inF <- paste0(p_Params$projRoot, "/analysis/GREENGridModel/GREENGridModel_v2.Rmd") +outF <- paste0(p_Params$projRoot,"/analysis/GREENGridModel/GREENGridModel_v", version, ".html") +#makeReport(inF,outF) + + -- GitLab