Skip to content
Snippets Groups Projects
Commit 254fe07f authored by Ben Anderson's avatar Ben Anderson
Browse files

moved main processing code to here as it should really only be run once (or a...

moved main processing code to here as it should really only be run once (or a very few times) a month - no need for function.
parent 5cf49759
No related branches found
No related tags found
No related merge requests found
library(greenGridr)
#### About ----
# Code to process NZ GREEN Grid grid spy data
#### Libraries ----
library(data.table)
library(lubridate) # keep here otherwise data.table masks various functions
library(readr)
library(greenGridr)
fpath <- "/Volumes/hum-csafe/Research Projects/GREEN Grid/_RAW DATA/GridSpyData" # location of data
#### Local parameters ----
fpath <- "/Volumes/hum-csafe/Research Projects/GREEN Grid/_RAW DATA/GridSpyData/" # location of data
#fpath <- "~/Data/NZGreenGrid/gridspy/1min_orig/" # location of data
pattern <- "*at1.csv$" # filters only 1 min data
outPath <- "/Volumes/hum-csafe/Research Projects/GREEN Grid/Clean_data/gridSpy/1min/" # place to save them
#outPath <- "~/Data/NZGreenGrid/gridspy/consolidated/"
dataThreshold <- 3000 # assume any files smaller than this (bytes) = no data
### Code ----
# Get the full 1 minute file listing ----
filesDT <- list1mGridSpyFiles(fpath, pattern)
filesDT <- filesDT[, c("hhID","fileName") := tstrsplit(V1, "/")]
filesDT <- filesDT[, fullPath := paste0(fpath, hhID,"/",fileName)]
print(paste0("Found ", nrow(filesDT), " files from ", uniqueN(filesDT$hhID), " households."))
# check
head(filesDT)
# save the files out
outPath <- "~/Data/NZGreenGrid/gridspy/consolidated/" # place to save them
process1mGridSpyFiles(filesDT)
# Load, process & save the ones which probably have data ----
hhIDs <- unique(filesDT$hhID) # list of household ids
allFileInfoDT <- data.table()
for(hh in hhIDs){
print(paste0("Loading: ", hh))
tempHhDT <- data.table() # create data.table to hold file contents
filesToLoad <- filesDT[hhID == hh, fullPath]
for(f in filesToLoad){
# check file
# print(paste0("Checking: ", f))
rf <- path.expand(f) # just in case of ~ etc
finfo <- file.info(rf)
allFileInfoDT <- rbind(allFileInfoDT, as.data.table(finfo))
fsize <- file.size(rf)
if(fsize > dataThreshold){ # set above
print(paste0("Checking: ", f))
print(paste0("File size = ", file.size(f), " so probably OK")) # files under 3kb are probably empty
# attempt to load the file
tempDT <- fread(f)
tempHhDT <- rbind(tempHhDT, tempDT, fill = TRUE) # just in case there are different numbers of columns (quite likely!)
}
}
# tidy column names
tempHhDT$r_dateTime <- tempHhDT$"date NZ"
tempHhDT$"date NZ" <- NULL #to avoid confusion
# remove duplicates caused by over-lapping files
nObs <- nrow(tempHhDT)
print(paste0("N rows before removal of dublicates: ", nObs))
tempHhDT <- unique(tempHhDT)
nObs <- nrow(tempHhDT)
print(paste0("N rows after removal of dublicates: ", nObs))
# set month
tempHhDT$month <- month(tempHhDT$r_dateTime) # requires lubridate
tempHhDT$year <- year(tempHhDT$r_dateTime) # requires lubridate
# > save out by year & month ----
months <- unique(tempHhDT$month)
years <- unique(tempHhDT$year)
for(m in months){
for(y in years){
ofile <- paste0(outPath, hh,"_", y, "_", m, "_all_1min_data.csv")
write_csv(tempHhDT[month == m & year == y], ofile)
print(paste0("Saved ", ofile))
#cmd <- paste0("gzip -f ", ofile) # gzip it
#try(system(cmd)) # in case it fails - if it does there will just be .csv files (not gzipped) - e.g. under windows
}
}
}
summary(allFileInfoDT)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment