From a015fbde70ec916066def3608b1d4452b45fb810 Mon Sep 17 00:00:00 2001 From: Ben Anderson <dataknut@icloud.com> Date: Tue, 23 May 2017 13:38:03 +0100 Subject: [PATCH] updated function to create uniq pid & diarypid using a hash function to ensure unique ids across all years (including UK TU 2014) --- mtusFunctions.R | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/mtusFunctions.R b/mtusFunctions.R index 4299ab4..6c30475 100644 --- a/mtusFunctions.R +++ b/mtusFunctions.R @@ -17,7 +17,8 @@ lb_myRequiredPackages <- function(x,y){ } # Use the function to load the libraries required by this code -reqLibs <- c("data.table" # fast data munching +reqLibs <- c("data.table", # fast data munching + "openssl" # for hashing ids ) print(paste0("Loading the following libraries using lb_myRequiredPackages: ", reqLibs)) @@ -30,6 +31,33 @@ ba_tidyNum <- function(number) { format(number, big.mark=",", scientific=FALSE) } +### +ba_MTUScreateIds <- function(dt){ + # create a unique pid & unique diary pid that will be unique across all years + dt <- dt[, ba_pidChar := paste0(survey, swave, msamp, hldid,persid,sep= "_")] # force character + dt <- dt[, ba_pid := md5(ba_pidChar)] # hash + dt <- dt[, ba_diarypid := paste0(ba_pid, id, sep= "_")] # force character +} + +# # diarypid +# mtusUKEpsDT$ba_diarypid <- +# group_indices(mtusUKEpsDT, survey, +# swave, +# msamp, +# hldid, +# persid, +# id +# ) +# +# # pid +# mtusUKEpsDT$ba_pid <- +# group_indices(mtusUKEpsDT, survey, +# swave, +# msamp, +# hldid, +# persid +# ) + ### ba_MTUScreateEpisodeStartEndDateTimes <- function(dt){ # Setting up corrected start and end timestamps @@ -53,8 +81,8 @@ ba_MTUScreateAllEpisodeStartTimesAsString <- function(dt){ # start time as string # Use the fake start time we created earlier which includes all cases dt <- dt[, st_hour := as.POSIXlt(dt$r_epStartDateTime)$hour] - dt <- dt[, st_mins := as.POSIXlt(mtusUKEpsDT$r_epStartDateTime)$min] - dt <- dt[, str_epStartTime := paste0(dt$st_hour, + dt <- dt[, st_mins := as.POSIXlt(dt$r_epStartDateTime)$min] + dt <- dt[, r_epStartTimeStr := paste0(dt$st_hour, ":", dt$st_mins ) -- GitLab