Skip to content
Snippets Groups Projects
Commit ed3f9dd0 authored by Ben Anderson's avatar Ben Anderson
Browse files

new vesion, uses drake; adds gxp data

parent e670ec2d
No related branches found
No related tags found
No related merge requests found
# loads data & runs a report
# Load some packages
require(spatialec)
libs <- c("GREENGridData", # data utilities
"data.table", # data munching
"drake", # data pre-loading etc
"here", # here. not there
"skimr", # skimming data for fast descriptives
"survey" # survey estimates
)
spatialec::loadLibraries(libs) # should install any that are missing
spatialec::setup() # set data paths etc
# parameters ----
#> GXP data ----
gxpF <- paste0(p_Params$gxpPath, "gxpDataDT_latest.csv")
# > IPF weights based on census 2013 ----
ipfCensusF <- paste0(p_Params$censusPath, "/processed/2013IpfInput.csv")
#> GREEN Grid half hourly power data ----
# we're going to re-use the data we created for EECA
#>> whole dwelling ----
hhTotalLoadF <- paste0(p_Params$ggPath,
"gridSpy/halfHour/extracts/halfHourImputedTotalDemand.csv.gz")
#>> heat pump ----
hhHPLoadF <- paste0(p_Params$ggPath,
"gridSpy/halfHour/extracts/halfHourHeatPump.csv.gz")
#>> hot water ----
hhHWLoadF <- paste0(p_Params$ggPath,
"gridSpy/halfHour/extracts/halfHourHotWater.csv.gz")
#>> lighting ----
hhLightingLoadF <- paste0(p_Params$ggPath,
"gridSpy/halfHour/extracts/halfHourLighting.csv.gz")
#> GREEN Grid household survey data ----
hhAttributesF <- paste0(p_Params$ggPath,
"survey/ggHouseholdAttributesSafe_2019-10-20.csv.gz") # latest version
#> ipf weights data from previous run of model ----
ipfWeightsF <- paste0(p_Params$ggPath,
"ipf/nonZeroWeightsAu2013.csv")
# > defn of peak ----
amPeakStart <- hms::as_hms("07:00:00")
amPeakEnd <- hms::as_hms("09:00:00")
pmPeakStart <- hms::as_hms("17:00:00") # see https://www.electrickiwi.co.nz/hour-of-power
pmPeakEnd <- hms::as_hms("21:00:00") # see https://www.electrickiwi.co.nz/hour-of-power
# functions ----
loadGxpData <- function(f){
dt <- data.table::fread(f)
# do any extra cleaning here
dt[, r_dateTime := lubridate::as_datetime(rDateTime,
tz = "Pacific/Auckland")]
dt[, year := lubridate::year(r_dateTime)]
dt <- GREENGridData::addNZSeason(dt)
return(dt)
}
loadHalfHourPower <- function(f){
# this assumes they have the same format
message("Loading: ", f)
dt <- data.table::fread(f) # super fast
# fix date times
dt[, r_dateTime := lubridate::as_datetime(r_dateTimeHalfHour)]
dt[, r_dateTimeNZ := lubridate::with_tz(r_dateTime, tzone = "Pacific/Auckland")]
dt[, hms := hms::as_hms(r_dateTimeNZ)]
# add season
dt <- GREENGridData::addNZSeason(dt)
return(dt)
}
loadDrake <- function(drakeThing){
dt <- drake::readd(drakeThing) # breaks, why?
return(dt)
}
makeSAETable <- function(dt, from, to){
# creates small area profiles for the power data passed in as dt
# produces weighted means (& se)
# can set dates from & to (inclusive)
# always aagregates by season even if only a single day
# aggregates by hms in the data (which can be whatever you want)
weights <- drake::readd(ipfWeights2013)
weights[, area := AU2013_label]
nAreas <- uniqueN(weights$area)
#powerDT <- drake::readd(hhHPLoad) # for testing
#powerDT <- drake::readd(get(drakeObject))
powerDT <- dt
powerDT[, date := lubridate::as_date(r_dateTimeNZ)]
nHouseholds <- uniqueN(powerDT$linkID)
message("Making SAE table for: ", nAreas ,
" areas using ", nHouseholds, " households from ",
from, " to ", to)
data <- powerDT[date >= from & date <= to, # allow date range
.(meanW = mean(meanPowerW)), keyby = .(linkID,
season,
hms)]
dt <- makeSvyTable(data, weights) # creates a mean by season, hms & area
return(dt)
}
makeSvyTable <- function(data, weights){
# assumes a variable called 'area' defines the areas you are aggregating to
# doesn't care what's in it
# aggregates to season using hms (which can be any time frame you want as it's converted to char)
# takes a long time so best to pre-run if we can in drake
# join the weights table to the data table
setkey(data, linkID)
setkey(weights, linkID)
dt <- weights[data, allow.cartesian=TRUE]
# make the result into a svydes
dt <- dt[, c_hms := as.character(hms)] # svy breaks on hms for some reason
message("Setting survey design using weights")
# the bigger the data set the longer this takes
svydes <- survey::svydesign(ids = ~linkID, # use synthetic hh_id
weights = ~ipfWeight, # hard coded
data = dt[!is.na(ipfWeight)] # make sure no NA weights
)
# assumes we want a mean
message("Calculating weighted table")
# the bigger the dataset the longer this takes
system.time(svyt <- survey::svyby(~meanW, ~ area + c_hms + season, # hard coded
svydes, # the data
svymean, # get mean
keep.var = TRUE,
exclude=NULL,
na.action=na.pass
)
)
dt <- as.data.table(svyt)
# put the region label back
labelsDT <- weights[, .(nMBs = mean(nMBs)), keyby = .(area = AU2013_label, REGC2013_label)]
setkey(labelsDT, area)
setkey(dt, area)
dt <- dt[labelsDT]
dt[, hms := hms::as_hms(c_hms)] # put hms back
return(dt)
}
loadWeights <- function(wf){
dt <- data.table::fread(wf)
# restrict regions
dt <- dt[REGC2013_label %like% "Hawke" | REGC2013_label %like% "Taranaki"]
# reduce areas for testing
#dt <- dt[AU2013_label %like% "Park"] # selection
t <- table(dt$AU2013_label, dt$REGC2013_label)
message("Filtered areas to ", uniqueN(dt$AU2013_label), " areas from ",
uniqueN(dt$REGC2013_label), " regions")
return(dt)
}
makeReport <- function(inF,outF){
# default = html
rmarkdown::render(input = inF,
params = list(title = title,
subtitle = subtitle,
authors = authors),
output_file = outF
)
}
testFiles <- function(){
file.info(ipfWeightsF)
file.info(ipfCensusF)
file.info(ipfSurveyF)
file.info(hhAttributesF)
file.info(hhTotalLoadF)
file.info(hhHPLoadF)
file.info(hhHWLoadF)
file.info(hhLightingLoadF)
}
# Drake plan ----
#this is where we use drake if we can
# defaults
from <- as.Date("2015-01-01")
to <- as.Date("2015-12-31")
plan <- drake::drake_plan(
gxpData = loadGxpData(gxpF),
ipfWeights2013 = loadWeights(ipfWeightsF),
hhTotalLoad = loadHalfHourPower(hhTotalLoadF),
hhHPLoad = loadHalfHourPower(hhHPLoadF),
hhHWLoad = loadHalfHourPower(hhHWLoadF),
hhLightingLoad = loadHalfHourPower(hhLightingLoadF),
auTotalLoad = makeSAETable(dt = hhTotalLoad, from, to), # big one
auHPLoad = makeSAETable(dt = hhHPLoad, from, to),
auHWLoad = makeSAETable(dt = hhHWLoad, from, to),
auHWLoad2015_05_25 = makeSAETable(dt = hhHWLoad,
from = as.Date("2015-05-25"),
to = as.Date("2015-05-25")), # single day
auLightingLoad = makeSAETable(dt = hhLightingLoad, from, to)
)
# Code ----
# > run drake plan ----
plan # test the plan
make(plan) # run the plan, re-loading data if needed
#> Load census data ----
# will load the latest version - fast so do it here
ipfCensusDT <- data.table::fread(ipfCensusF)
#> Load household data ----
# small so load here
hhAttributesDT <- data.table::fread(hhAttributesF)
# fix PV inverter
hhAttributesDT[, `PV Inverter` := ifelse(`PV Inverter` == "", "No", "Yes")]
# > Load IPF survey input if needed - will load the latest version ----
# small so load here
ipfSurveyF <- paste0(p_Params$ggPath, "survey/ggIpfInput.csv")
ipfSurveyDT <- data.table::fread(ipfSurveyF)
# > Make report ----
# >> yaml ----
version <- "2.0_all_AUs"
title <- paste0("GREENGrid area unit level electricity demand model")
subtitle <- paste0("Version: ", version)
authors <- "Ben Anderson (University of Otago & University of Southampton, ben.anderson@otago.ac.nz)"
# quick test - up to date?
testFiles()
# >> run report ----
inF <- paste0(p_Params$projRoot, "/analysis/GREENGridModel/GREENGridModel_v2.Rmd")
outF <- paste0(p_Params$projRoot,"/analysis/GREENGridModel/GREENGridModel_v", version, ".html")
#makeReport(inF,outF)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment