diff --git a/greenGridr/R/processGridSpyFiles.R b/greenGridr/R/processGridSpyFiles.R index eb3c6f5b2872652ba21be0a9ae08a994dca5d4cf..bbc3d3098ac9b8e908de41011022eb57f7b4a099 100644 --- a/greenGridr/R/processGridSpyFiles.R +++ b/greenGridr/R/processGridSpyFiles.R @@ -11,67 +11,9 @@ #' @export #' list1mGridSpyFiles <- function(fpath, pattern){ - # fpath <- "/Volumes/hum-csafe/Research Projects/GREEN Grid/_RAW DATA/GridSpyData" - # pattern <- "*at1.csv$" print(paste0("Looking for files matching ", pattern, " in ", fpath)) dt <- as.data.table(list.files(path = fpath, pattern = pattern, # use the pattern to filter e.g. 1m from 30s files recursive = TRUE)) - print("First 6 rows of list:") - print(head(dt)) - print("Processing file list") - dt <- dt[, - c("hhID","fileName") := tstrsplit(V1, "/") # get actual household id & filename - ] - dt <- dt[, - fullPath := paste0(fpath,"/",V1) # get full path - ] - print(paste0("Found ", tidyNum(nrow(dt)), " files from ", uniqueN(dt$hhID), " households.")) - return(dt[, .(hhID, fullPath)]) + return(dt) } -#' Load all available 1 minute grid spy data files from the list returned by list1mGridSpyFiles -#' -#' \code{process1mGridSpyFiles}. Loads, processes ans saves 1 minute grid spy files by iterating over each household id. We we cannot just concatinate all the files since the -#' column headings (circuit labels) vary. The function saves out 1 data file per household ID per month as a gzipped .csv file. -#' @param dt a data table with 2 columns: hhID and fullPath derived from list1mGridSpyFiles() -#' -#' @author Ben Anderson, \email{b.anderson@@soton.ac.uk} -#' @export -#' -process1mGridSpyFiles <- function(dt){ - # outPath <- "~/Data/NZGreenGrid/gridspy/consolidated/" - hhIDs <- unique(dt$hhID) # list of household ids - for(hh in hhIDs){ - print(paste0("Loading: ", hh)) - tempHhF <- data.frame() # create tbl to hold file contents - filesToLoad <- dt[hhID == hh, .(fullPath)] - for(f in filesToLoad){ - # check file - print(paste0("Checking: ", f)) - fsize <- file.size(f) - if(fsize > 3000){ - print(paste0("File size = ", file.size(f), " so probably OK")) # files under 3kb are probably empty - # attempt to load the file - tempF <- read_csv(f, progress = FALSE, col_types = cols()) # can import .gz, requires readr, use the NULL col_types to suppress feedback - tempHhF <- rbind(tempHhF, tempF) - } - } - # tidy column names - tempHhF$r_dateTime <- tempHhF$"date NZ" - tempHhF$"date NZ" <- NULL #to avoid confusion - # set month - tempHhF$month <- month(tempHhF$r_dateTime) # requires lubridate - tempHhF$year <- year(tempHhF$r_dateTime) # requires lubridate - # save out by year & month - months <- unique(tempHhF$month) - years <- unique(tempHhF$year) - for(m in months){ - for(y in years){ - ofile <- paste0(outPath, hh,"_", y, "_", m, "_all_1min_data.csv") - write_csv(tempHhF[month == m & year == y], ofile) - cmd <- paste0("gzip -f ", ofile) # gzip it - try(system(cmd)) # in case it fails - if it does there will just be .csv files (not gzipped) - e.g. under windows - } - } - } -}