diff --git a/greenGridr/R/processGridSpyFiles.R b/greenGridr/R/processGridSpyFiles.R index 06756c8c5ce4bc275bc2b0276bd2513747401a44..eb3c6f5b2872652ba21be0a9ae08a994dca5d4cf 100644 --- a/greenGridr/R/processGridSpyFiles.R +++ b/greenGridr/R/processGridSpyFiles.R @@ -16,11 +16,14 @@ list1mGridSpyFiles <- function(fpath, pattern){ print(paste0("Looking for files matching ", pattern, " in ", fpath)) dt <- as.data.table(list.files(path = fpath, pattern = pattern, # use the pattern to filter e.g. 1m from 30s files recursive = TRUE)) + print("First 6 rows of list:") + print(head(dt)) + print("Processing file list") dt <- dt[, c("hhID","fileName") := tstrsplit(V1, "/") # get actual household id & filename ] dt <- dt[, - fullPath := paste0(fpath,"/",V1) # get actual file name + fullPath := paste0(fpath,"/",V1) # get full path ] print(paste0("Found ", tidyNum(nrow(dt)), " files from ", uniqueN(dt$hhID), " households.")) return(dt[, .(hhID, fullPath)]) @@ -29,12 +32,46 @@ list1mGridSpyFiles <- function(fpath, pattern){ #' Load all available 1 minute grid spy data files from the list returned by list1mGridSpyFiles #' #' \code{process1mGridSpyFiles}. Loads, processes ans saves 1 minute grid spy files by iterating over each household id. We we cannot just concatinate all the files since the -#' column headings (circuit labels) vary. The function saves out 1 data file per household ID per month. +#' column headings (circuit labels) vary. The function saves out 1 data file per household ID per month as a gzipped .csv file. #' @param dt a data table with 2 columns: hhID and fullPath derived from list1mGridSpyFiles() #' #' @author Ben Anderson, \email{b.anderson@@soton.ac.uk} #' @export #' process1mGridSpyFiles <- function(dt){ - + # outPath <- "~/Data/NZGreenGrid/gridspy/consolidated/" + hhIDs <- unique(dt$hhID) # list of household ids + for(hh in hhIDs){ + print(paste0("Loading: ", hh)) + tempHhF <- data.frame() # create tbl to hold file contents + filesToLoad <- dt[hhID == hh, .(fullPath)] + for(f in filesToLoad){ + # check file + print(paste0("Checking: ", f)) + fsize <- file.size(f) + if(fsize > 3000){ + print(paste0("File size = ", file.size(f), " so probably OK")) # files under 3kb are probably empty + # attempt to load the file + tempF <- read_csv(f, progress = FALSE, col_types = cols()) # can import .gz, requires readr, use the NULL col_types to suppress feedback + tempHhF <- rbind(tempHhF, tempF) + } + } + # tidy column names + tempHhF$r_dateTime <- tempHhF$"date NZ" + tempHhF$"date NZ" <- NULL #to avoid confusion + # set month + tempHhF$month <- month(tempHhF$r_dateTime) # requires lubridate + tempHhF$year <- year(tempHhF$r_dateTime) # requires lubridate + # save out by year & month + months <- unique(tempHhF$month) + years <- unique(tempHhF$year) + for(m in months){ + for(y in years){ + ofile <- paste0(outPath, hh,"_", y, "_", m, "_all_1min_data.csv") + write_csv(tempHhF[month == m & year == y], ofile) + cmd <- paste0("gzip -f ", ofile) # gzip it + try(system(cmd)) # in case it fails - if it does there will just be .csv files (not gzipped) - e.g. under windows + } + } + } }