Skip to content
Snippets Groups Projects
Commit 71249a40 authored by Ben Anderson's avatar Ben Anderson
Browse files

created as R package

parent f6887c38
No related branches found
No related tags found
No related merge requests found
^saveData\.Rproj$
^\.Rproj\.user$
Package: saveData
Title: Package to support the re-use of the SAVE household demand response trial data
Version: 0.1
Authors@R: c(person("Ben", "Anderson", email = "b.anderson@soton.ac.uk", role = c("aut", "cre")),
person("Michael", "Harper", email = "m.harper@soton.ac.uk", role = "aut"),
person("Tom", "Rushby", email = "twrushby@soton.ac.uk", role = "aut"))
Description: A range of scripts for data cleaning and processing.
This work was funded by the Low Carbon Network Fund (LCNF) Tier 2 Programme project "Solent Achieving Value from Efficiency (SAVE)" http://www.energy.soton.ac.uk/tag/save/.
License: file LICENCE
Encoding: UTF-8
LazyData: true
# Generated by roxygen2: fake comment so roxygen2 overwrites silently.
exportPattern("^[^\\.]")
This diff is collapsed.
This diff is collapsed.
#' Tidy long numbers
#'
#' \code{tidyNum} reformats long numbers to include commas and prevents scientific formats,
#' making them suitable for printing within R Markdown reports and inline text.
#'
#' @param number an input number or list
#'
#' @examples
#' tidyNum(123456789)
#' tidyNum(10^6)
#' tidyNum(c(10^6, 10^7, 10^8))
#'
#' @author Ben Anderson, \email{banderson@@soton.ac.uk}
#' @export
#' @family Utilities
tidyNum <- function(number) {
format(number, big.mark=",", scientific=FALSE)
}
#' Return middle 98\% of a distribution
#'
#' \code{getP98} filters top & bottom 1\% and returns the middle 98\%
#'
#' @param dt the data table
#' @param var the variable to filter
#' @return the middle 98pc as a data.table
#'
#' @author Ben Anderson, \email{banderson@@soton.ac.uk}
#' @export
#' @family Utilities
getP98 <-function(dt,var){
# expects a data.table
# returns a data.table with the middle 98% of var
# Calculate mean & sd for use in calculating bounds
attach(dt)
tempMean <- mean(get(var))
tempSD <- sd(get(var))
# p98 extract
p01 <- quantile(get(var), probs = 0.01) # central 98%
p99 <- quantile(get(var), probs = 0.99)
dt <- subset(dt, p01 < get(var) & get(var) < p99)
return(dt)
}
#' Return middle 95\% of a distribution using z scores
#'
#' \code{getZ95} filters top & bottom 2.5\% using z scores and returns the middle 95\%
#'
#' @param dt the data table
#' @param var the variable to filter
#'
#'
#' @author Ben Anderson, \email{banderson@@soton.ac.uk}
#' @export
#' @family Utilities
getZ95 <-function(dt,var){
# expects a data.table
# returns a data.table with only +/- 95% z for var
# Calculate mean & sd for use in calculating bounds
attach(dt)
tempMean <- mean(get(var))
tempSD <- sd(get(var))
# z95 extract
z95Lower <- tempMean - (qnorm(0.975) * tempSD) # +/- 2.5%
z95Upper <- tempMean + (qnorm(0.975) * tempSD)
dt <- subset(dt, z95Lower < get(var) & get(var) < z95Upper)
return(dt)
}
#' Prints information about the R session
#'
#' \code{DisplaySystemInfo} provides a summary printout of the
#' R session, including the operating system, user info and project location.
#'
#' Before running this, the user should have defined the \code{projLoc} variable
#' using the \code{SAVEr::findParentDirectory}
#'
#' @param printMessage TRUE/FALSE. States whether the parameter should be printed.
#'
#' @author Mikey Harper, \email{m.harper@@soton.ac.uk}
#' @seealso \code{\link{findParentDirectory}}
#' @seealso \code{\link{assignUserSystemInfo}}
#' @export
#' @family Utilities
displaySystemInfo <- function(printMessage = TRUE){
if(is.na(sysName)) assignUserSystemInfo()
if(is.na(projLoc)) stop("Please specify `projLoc` in the Global Environment")
if(isTRUE(printMessage)){
cat(paste0("Running on ", sysName, " with projLoc = ", projLoc, "\n \n",
" User: ",Sys.info()[[7]]), "\n",
"Platform: ",Sys.info()[[4]], "\n",
"Input data path: ",dPath, "\n")
}
}
#' Installs and loads packages
#'
#' \code{myRequiredPackages} checks whether the package is already installed,
#' installing those which are not preinstalled. All the libraries are then load.
#'
#' Especially useful when running on virtual machines where package installation
#' is not persistent (Like UoS sve). It will fail if the packages need to be
#' installed but there is no internet access
#' @param ... A list of packages
#' @param repository The repository to load functions from. Defaults to "https://cran.ma.imperial.ac.uk/"
#' @importFrom utils install.packages
#'
#' @author Luke Blunden, \email{lsb@@soton.ac.uk} (original)
#' @author Michael Harper \email{m.harper@@soton.ac.uk} (revised version)
#' @export
#' @family Utilities
myRequiredPackages <- function(..., repository = "https://cran.rstudio.com/"){
packages <- c(...)
# Find if package isn't installed
newPackages <- packages[!(packages %in% utils::installed.packages()[,1])]
# Install if required
if (length(newPackages)){utils::install.packages(newPackages, dependencies = TRUE)}
# Load packages
sapply(packages, require, character.only = TRUE)
}
#' Installs and loads packages from Github
#'
#' \code{myRequiredPackagesGithub} checks whether the package is already installed.
#' If the package is not detected, it will attempt to install it from GitHub,
#' before loading the packages.
#'
#'
#' @param ... A list of packages
#' @importFrom utils install.packages
#'
#' @author Michael Harper \email{m.harper@@soton.ac.uk}
#' @export
#' @family Utilities
myRequiredPackagesGithub <- function(..., quiet = FALSE){
packages <- c(...)
# Only keep package name
packageName <- gsub("^.*/", "", packages)
# Find if package isn't installed
newPackages <- packages[!(packageName %in% utils::installed.packages()[,1])]
# Install if required
if (length(newPackages)){devtools::install_github(newPackages)}
# Load packages
sapply(packageName, require, character.only = TRUE)
}
#' Compresses any file to .gz file
#'
#' The function will check and delete any existing compressed file
#'
#' @param file path to file
#' @param output file
#' @author Ben Anderson
#'
#' @family Utilities
gzipIt <- function(file){
# Check if input exists
if(!file.exists(file)) stop("Input `file` not located.")
# Path of output file
zipped <- paste0(file, ".gz")
# Rremove old gzip file if exists
if(file.exists(zipped)){
message("Deleting old File...")
file.remove(zipped)
message("DONE\n")
}
# Gzip new one
# in case it fails (it will on windows - you will be left with the file)
message(paste0("gzipping file to: ", zipped))
try(system( paste0("gzip ", file)))
message("Gzipped file created.")
}
#' Extract a compressed .7z file
#'
#' The function will unpack a compressed file
#'
#' @param inFile name of archive file
#' @param fileDir directory containing file
#'
#' @author Tom Rushby, \email{(t.w.rushby@@soton.ac.uk)}
#' @export
#'
#' @family Utilities
#' @examples
#' extract7z(fileDir = "/SotonGitLab/SERG/SAVE", inFile = "LED_installs.7z")
#' extract7z(fileDir = outFileDir, inFile = inFile)
extract7z <- function(fileDir, inFile){
filePath <- paste0(fileDir,inFile)
# Check if input file exists
if(!file.exists(filePath)) stop("Input `file` does not exist.")
archive::archive(filePath)
# create output file directory
print(paste0("Extracting file to location: ",fileDir,"\n"))
archive::archive_extract(filePath, fileDir, file = NULL)
}
#' Find user information
#'
#' \code{UserSystemInfo} lists the user system information. No parameters have to
#' be provided
#'
#' @return The system name, username and computer.
#' @author Mikey Harper, \email{m.harper@@soton.ac.uk}
#' @export
#' @family Utilities
assignUserSystemInfo <- function(){
assign("sysInfo", Sys.info(), envir = globalenv())
assign("sysName", sysInfo[[1]], envir = globalenv())
assign("nodeName", sysInfo[[4]], envir = globalenv())
assign("userName", sysInfo[[7]], envir = globalenv())
rm(sysInfo, envir = globalenv())
}
#' Converts timestamps between local times
#'
#' Overrides the timestamp of a timestamp, converting the clock time to a different
#' local time.
#'
#' @description
#' All data collected within the SAVE project is collected in UTC time. For the analysis
#' of behaviour patterns, it is important that the local time is used
#'
#' @param timestamp a POSIXct, POSIXlt, Date, chron date-time object or a data.frame object.
#' @param from,to valid timezone values
#' @export
#' @author Michael Harper
#'
#' @family Utilities
convertTimezone <- function(timestamp, from = "UTC", to = "Europe/London", message = TRUE, warning = TRUE){
# check valid timestamps
if(!is.POSIXct(timestamp)) stop("timestamp must be a POSIXct object")
if(!(from %in% OlsonNames())) stop("specified 'from' is not a valid timezone")
if(!(to %in% OlsonNames())) stop("specified 'to' is not a valid timezone")
# Check the input timestamp
inputTimezone <- attr(timestamp, "tzone")
if(from != inputTimezone){
# Prints warning if specified
if(warning) warning("Input timezone does not match the specified object.",
"Timezone of object: ", inputTimezone, "\n",
"Timezone specified: ", from, "\n")
if(message) message("Timezone overwritten by function without conversion of the clock time")
timestamp <- lubridate::force_tz(time = timestamp, tzone = from)
}
# Convert ID
timestamp2 <- lubridate::with_tz(time = timestamp, tzone = to)
return(timestamp2)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment