diff --git a/greenGridr/R/processGridSpyFiles.R b/greenGridr/R/processGridSpyFiles.R index 0d25c5ac20ef77b3ef69c2494327105b10ec3cba..06756c8c5ce4bc275bc2b0276bd2513747401a44 100644 --- a/greenGridr/R/processGridSpyFiles.R +++ b/greenGridr/R/processGridSpyFiles.R @@ -1,8 +1,9 @@ -#' Functions for processing grid spy data files +#' List all available 1 minute grid spy data files from a given path #' #' \code{list1mGridSpyFiles}. Lists all available 1 minute grid spy files from data repository. The functions, like most others in this package will fail -#' if you do not have the data in a folder/directory corresponding to 'path'. Code borrows extensively from similar SAVE project function. -#' This could take quite a long time if you have a lot of files. +#' if you do not have the data in a folder/directory corresponding to 'fpath'. Code borrows extensively from similar SAVE project function. +#' This could take quite a long time if you have a lot of files. The function returns a data table with 2 columns: hhID (household id) and fullPath (full path to each file) +#' We need the hhID as we cannot just concatinate all the files since the column headings (circuit labels) vary. #' @param fpath the name of the directory where the function should look #' @param pattern a pattern to match. Use the pattern to filter e.g. 1m (xx_at1.csv) from 30s () files #' @@ -10,17 +11,30 @@ #' @export #' list1mGridSpyFiles <- function(fpath, pattern){ - # /Volumes/hum-csafe/Research Projects/GREEN Grid/_RAW DATA/GridSpyData - # "*at1.csv$" + # fpath <- "/Volumes/hum-csafe/Research Projects/GREEN Grid/_RAW DATA/GridSpyData" + # pattern <- "*at1.csv$" print(paste0("Looking for files matching ", pattern, " in ", fpath)) - fileListDT <- as.data.table(list.files(path = fpath, pattern = pattern, # use the pattern to filter e.g. 1m from 30s files - recursive = TRUE)) - fileListDT <- fileListDT[, - c("hhID","fileName") := tstrsplit(V1, "/") # get actual household id & filename - ] - fileListDT <- fileListDT[, - fullPath := paste0(fpath,"/",V1) # get actual file name - ] - print(paste0("Found ", tidyNum(nrow(fileListDT)), " of them.")) - return(fileListDT) + dt <- as.data.table(list.files(path = fpath, pattern = pattern, # use the pattern to filter e.g. 1m from 30s files + recursive = TRUE)) + dt <- dt[, + c("hhID","fileName") := tstrsplit(V1, "/") # get actual household id & filename + ] + dt <- dt[, + fullPath := paste0(fpath,"/",V1) # get actual file name + ] + print(paste0("Found ", tidyNum(nrow(dt)), " files from ", uniqueN(dt$hhID), " households.")) + return(dt[, .(hhID, fullPath)]) +} + +#' Load all available 1 minute grid spy data files from the list returned by list1mGridSpyFiles +#' +#' \code{process1mGridSpyFiles}. Loads, processes ans saves 1 minute grid spy files by iterating over each household id. We we cannot just concatinate all the files since the +#' column headings (circuit labels) vary. The function saves out 1 data file per household ID per month. +#' @param dt a data table with 2 columns: hhID and fullPath derived from list1mGridSpyFiles() +#' +#' @author Ben Anderson, \email{b.anderson@@soton.ac.uk} +#' @export +#' +process1mGridSpyFiles <- function(dt){ + }