Skip to content
Snippets Groups Projects
Commit 5cf49759 authored by Ben Anderson's avatar Ben Anderson
Browse files

removed the data processing function as the data.table parts seem to break. Now in R script

parent 5bacefec
Branches
No related tags found
No related merge requests found
......@@ -11,67 +11,9 @@
#' @export
#'
list1mGridSpyFiles <- function(fpath, pattern){
# fpath <- "/Volumes/hum-csafe/Research Projects/GREEN Grid/_RAW DATA/GridSpyData"
# pattern <- "*at1.csv$"
print(paste0("Looking for files matching ", pattern, " in ", fpath))
dt <- as.data.table(list.files(path = fpath, pattern = pattern, # use the pattern to filter e.g. 1m from 30s files
recursive = TRUE))
print("First 6 rows of list:")
print(head(dt))
print("Processing file list")
dt <- dt[,
c("hhID","fileName") := tstrsplit(V1, "/") # get actual household id & filename
]
dt <- dt[,
fullPath := paste0(fpath,"/",V1) # get full path
]
print(paste0("Found ", tidyNum(nrow(dt)), " files from ", uniqueN(dt$hhID), " households."))
return(dt[, .(hhID, fullPath)])
return(dt)
}
#' Load all available 1 minute grid spy data files from the list returned by list1mGridSpyFiles
#'
#' \code{process1mGridSpyFiles}. Loads, processes ans saves 1 minute grid spy files by iterating over each household id. We we cannot just concatinate all the files since the
#' column headings (circuit labels) vary. The function saves out 1 data file per household ID per month as a gzipped .csv file.
#' @param dt a data table with 2 columns: hhID and fullPath derived from list1mGridSpyFiles()
#'
#' @author Ben Anderson, \email{b.anderson@@soton.ac.uk}
#' @export
#'
process1mGridSpyFiles <- function(dt){
# outPath <- "~/Data/NZGreenGrid/gridspy/consolidated/"
hhIDs <- unique(dt$hhID) # list of household ids
for(hh in hhIDs){
print(paste0("Loading: ", hh))
tempHhF <- data.frame() # create tbl to hold file contents
filesToLoad <- dt[hhID == hh, .(fullPath)]
for(f in filesToLoad){
# check file
print(paste0("Checking: ", f))
fsize <- file.size(f)
if(fsize > 3000){
print(paste0("File size = ", file.size(f), " so probably OK")) # files under 3kb are probably empty
# attempt to load the file
tempF <- read_csv(f, progress = FALSE, col_types = cols()) # can import .gz, requires readr, use the NULL col_types to suppress feedback
tempHhF <- rbind(tempHhF, tempF)
}
}
# tidy column names
tempHhF$r_dateTime <- tempHhF$"date NZ"
tempHhF$"date NZ" <- NULL #to avoid confusion
# set month
tempHhF$month <- month(tempHhF$r_dateTime) # requires lubridate
tempHhF$year <- year(tempHhF$r_dateTime) # requires lubridate
# save out by year & month
months <- unique(tempHhF$month)
years <- unique(tempHhF$year)
for(m in months){
for(y in years){
ofile <- paste0(outPath, hh,"_", y, "_", m, "_all_1min_data.csv")
write_csv(tempHhF[month == m & year == y], ofile)
cmd <- paste0("gzip -f ", ofile) # gzip it
try(system(cmd)) # in case it fails - if it does there will just be .csv files (not gzipped) - e.g. under windows
}
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment