Commit c069e1bb authored by Ben Anderson's avatar Ben Anderson
Browse files

copied repo from github

parents
"","","0.132 %","99.868 %"
"(Intercept)",-4906.32488134282,-5422.91360960268,-4389.73615308295
"boiler_dYes",-634.889497893768,-1503.94951655018,234.170520762645
"li_dYes",-1109.3289459234,-2033.7908068022,-184.867085044585
"","","0.125 %","99.875 %"
"(Intercept)",5538.75243436946,4804.4779868595,6273.02688187943
"gcons2005",0.442664307205753,0.409730228512676,0.47559838589883
"boiler_yearf2004",-360.855137128462,-3955.10883762456,3233.39856336763
"boiler_yearf2005",124.974971997595,-1931.96625604494,2181.91620004013
"boiler_yearf2006",290.808073500481,-1342.05433147932,1923.67047848028
"boiler_yearf2007",31.8514283316985,-1696.83830226688,1760.54115893027
"boiler_yearf2008",144.352202786964,-2010.30897619654,2299.01338177047
"boiler_yearf2009",-1520.1708072938,-3185.35313965138,145.011525063788
"boiler_yearf2010",-623.77410932744,-2194.40708440701,946.858865752132
"boiler_yearf2011",-1360.38444166192,-2822.57779194651,101.80890862267
"boiler_yearf2012",-235.845253457229,-1657.48492716929,1185.79442025483
"li_yearf2004",-666.408751821457,-2749.23685109009,1416.41934744717
"li_yearf2005",-984.94799477955,-4065.92765581412,2096.03166625502
"li_yearf2006",-1156.04975264878,-3352.31154367204,1040.21203837449
"li_yearf2007",-202.668491097184,-2520.11503254561,2114.77805035124
"li_yearf2008",-884.31504047424,-2927.03321418134,1158.40313323286
"li_yearf2009",-823.665346539929,-2855.34036508939,1208.00967200953
"li_yearf2010",-1726.32081273708,-3588.85184657084,136.210221096674
"li_yearf2011",-639.655103228841,-2352.2758647832,1072.96565832552
"li_yearf2012",-175.584036758165,-1445.86464558848,1094.69657207215
"","","0.125 %","99.875 %"
"(Intercept)",5574.77075319441,4836.50252955806,6313.03897683076
"gcons2005",0.442452987965157,0.409427673943213,0.475478301987102
"boiler_dYes",-495.016972954767,-1167.17180055764,177.137854648102
"li_dYes",-717.53012100222,-1432.85649482572,-2.20374717872357
"","LogOdds","0.2 %","99.8 %"
"(Intercept)",-1.67659279153163,-2.61205583711484,-0.779126893409085
"prop_age1930-1949",-0.139019060965494,-0.575873217200453,0.289082045802502
"prop_age1950-1966",0.150735674604357,-0.231413674447833,0.53184389168637
"prop_age1967-1982",0.486294644764,0.110437474714348,0.862661148265705
"prop_age1983-1995",0.888075421149092,0.415115151474894,1.3605541781508
"prop_age1996 onwards",0.722149727685277,0.207381532032632,1.22791111605648
"prop_typeSemi-detached",1.05759990719242,0.431846544916061,1.74949556802709
"prop_typeEnd terrace",1.10112099816427,0.408698053170598,1.84354791553054
"prop_typeMid terrace",1.70841962077556,1.07596210468448,2.40713825676627
"prop_typeBungalow",1.00849883068166,0.309166380742514,1.75792103339591
"prop_typeFlat (incl. maisonette)",2.46039112564896,1.69544335571267,3.27658332680545
"floor_area_band51-100 m2",-0.925365084337144,-1.45070529723602,-0.40540448473751
"floor_area_band101-150 m2",-2.05911005369613,-2.68916986701439,-1.43946446161723
"floor_area_band> 151 m2",-2.52232846748057,-3.7812739295941,-1.46865910885518
"loft_depth> 150 mm",0.135416262377144,-0.206016249022113,0.48753396923013
"loft_depthUnknown",-0.0764716988919605,-0.518856157566769,0.362629561555158
"","OddsRatio","0.2 %","99.8 %"
"(Intercept)",0.187010074072783,0.0733835240079649,0.458806423390788
"prop_age1930-1949",0.870211441229914,0.562213719707864,1.33520127169451
"prop_age1950-1966",1.16268928915454,0.793411184205491,1.7020678456851
"prop_age1967-1982",1.62627910024682,1.11676652072341,2.36945778972432
"prop_age1983-1995",2.43044755923666,1.51454513275693,3.89835308539153
"prop_age1996 onwards",2.05885443310386,1.2304519390651,3.41409044380796
"prop_typeSemi-detached",2.87945173776098,1.54009876104207,5.75170060243544
"prop_typeEnd terrace",3.00753557682287,1.50485726499845,6.31891752330237
"prop_typeMid terrace",5.52023052269914,2.93281321693106,11.102144154836
"prop_typeBungalow",2.74148249508408,1.3622890103057,5.80036608344525
"prop_typeFlat (incl. maisonette)",11.7093904874039,5.44906130220354,26.4851269113644
"floor_area_band51-100 m2",0.396386678087644,0.234404904647053,0.666707083472192
"floor_area_band101-150 m2",0.127567447571272,0.0679373129741845,0.237054676572506
"floor_area_band> 151 m2",0.0802724771188942,0.0227936354346421,0.230233997027952
"loft_depth> 150 mm",1.1450133111271,0.813819852398706,1.62829583716208
"loft_depthUnknown",0.926379131473718,0.595200974876787,1.43710340227662
# Week 10 lecture - data mining (basic)
# code by @dataknut
# with substantial help from http://www.statmethods.net/
# Structure in variables:
# Factor analysis
# Structure in cases:
# Multidimensional scaling
# Extracting groups:
# Cluster analysis
# Housekeeping
# clear the workspace
rm(list=ls())
# http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/mtcars.html
# A data frame with 32 observations on 11 variables.
# [, 1] mpg Miles/(US) gallon
# [, 2] cyl Number of cylinders
# [, 3] disp Displacement (cu.in.)
# [, 4] hp Gross horsepower
# [, 5] drat Rear axle ratio
# [, 6] wt Weight (1000 lbs)
# [, 7] qsec 1/4 mile time
# [, 8] vs V/S
# [, 9] am Transmission (0 = automatic, 1 = manual)
# [,10] gear Number of forward gears
# [,11] carb Number of carburetors
# load data ----
# don't actually need to do this but it reminds us what we're doing
mtcars <- mtcars
# libraries required
library(nFactors)
library(psych)
library(corrgram)
library(scatterplot3d)
#library(rgl) # for spinable scatters: plot3d(wt, disp, mpg, col="red", size=3)
# requires OpenGL which is not on OSX 10.11
# Factor Analysis: (Cars) ----
# check correlations
cor(mtcars)
# draw corrgram to visualise
corrgram(mtcars, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Cars Data in PC2/PC1 Order")
# suppose we assumed there was just one factor
fitCars1 <- factanal(mtcars, 1, rotation="varimax")
print(fitCars1, digits=2, cutoff=.3, sort=TRUE)
# suppose we assumed there were just two factors
fitCars2 <- factanal(mtcars, 2, rotation="varimax")
print(fitCars2, digits=2, cutoff=.3, sort=TRUE)
# plot factor 1 by factor 2
load <- fitCars2$loadings[,1:2]
plot(load,type="n") # set up plot
text(load,labels=names(mtcars),cex=.7) # add variable names
# Determine Number of Factors to Extract
# ref http://www.statmethods.net/advstats/factor.html
ev <- eigen(cor(mtcars)) # get eigenvalues
ap <- parallel(subject=nrow(mtcars),var=ncol(mtcars),
rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)
# for some reason this plot turns all subsequent plots green!
# where does the scree plot level off?
# Suggestion is to extract 3 factors,
# with varimax rotation
fitCars3factor <- factanal(mtcars, 3, rotation="varimax")
print(fitCars3factor, digits=2, cutoff=.3, sort=TRUE)
# plot factor 1 by factor 2
load2 <- fitCars3factor$loadings[,1:2]
plot(load2,type="n", col=1) # set up plot
text(load2,labels=names(mtcars),cex=.7) # add variable names
# plot all 3
load3 <- fitCars3factor$loadings[,1:3]
pairs(load3, upper.panel = NULL)
# retry with spinnable scatter once downloaded rgl
# plot3d(load3, col="red", size=3)
# Classical MDS: (Cars) ----
# See http://www.statmethods.net/advstats/mds.html
# plot simple scatter to illustrate
plot(mtcars$mpg, mtcars$hp)
text(mtcars$mpg, mtcars$hp+10,labels=row.names(mtcars), cex=.5) # add variable names
# plot 3D scatter to illustrate problem
scatterplot3d(mtcars$mpg, mtcars$hp, mtcars$wt, highlight.3d = TRUE, col.axis = "blue",
col.grid = "lightblue", main = "Cars", pch = 20)
text(mtcars$mpg, mtcars$hp, mtcars$wt, labels = row.names(mtcars), cex=.7)
# retry with spinnable scatter once downloaded rgl
# plot3d((mtcars$mpg, mtcars$hp, mtcars$wt, col="red", size=3)
# N rows (objects) x p columns (variables)
# each row identified by a unique row name
# scale first for comparability?
mtcars_s <- scale(mtcars)
d <- dist(mtcars_s) # euclidean distances between the rows
mtcarsMDSfit2 <- cmdscale(d,eig=TRUE, k=2) # k is the number of dim
mtcarsMDSfit2 # view results
write.csv(mtcarsMDSfit2$points, file = "results/mtcarsMDSfit2_points.csv")
# plot solution
# if we had specified 3 dimensions we would need a 3D scatter using
# https://cran.r-project.org/web/packages/scatterplot3d/index.html
x <- mtcarsMDSfit2$points[,1]
y <- mtcarsMDSfit2$points[,2]
plot(x, y, xlab="Dimension 1", ylab="Dimension 2",
main="Metric MDS on mtcars (2 dimensions)", type="p")
text(x, y+0.5, labels = row.names(mtcars), cex=.6)
mtcarsMDSfit3 <- cmdscale(d,eig=TRUE, k=3) # k is the number of dim
mtcarsMDSfit3
write.csv(mtcarsMDSfit3$points, file = "results/mtcarsMDSfit3_points.csv")
x <- mtcarsMDSfit3$points[,1]
y <- mtcarsMDSfit3$points[,3]
plot(x, y, xlab="Dimension 1", ylab="Dimension 3",
main="Metric MDS on mtcars (3 dimensions)", type="p")
text(x, y+0.1, labels = row.names(mtcars), cex=.6)
# 3 plot
x <- mtcarsMDSfit3$points[,1]
y <- mtcarsMDSfit3$points[,2]
z <- mtcarsMDSfit3$points[,3]
scatterplot3d(x, y, z, highlight.3d = TRUE, col.axis = "blue",
col.grid = "lightblue", main = "Cars", pch = 20)
text(x, y, z, labels = row.names(mtcars), cex=.7)
# Cluster analysis (Cars) ----
# ref http://www.statmethods.net/advstats/cluster.html
# Determine number of clusters
# re-use the scaled mtcars_s
mtcars_wss <- (nrow(mtcars_s)-1)*sum(apply(mtcars_s,2,var))
for (i in 2:15) mtcars_wss[i] <- sum(kmeans(mtcars_s,
centers=i)$withinss)
plot(1:15, mtcars_wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares",
main = "Cars: n clusters vs within group sum of squares")
# K-Means Cluster Analysis
kmeansmtcars_s <- kmeans(mtcars_s, 5) # 5 cluster solution
# get cluster means
write.csv(aggregate(mtcars_s,by=list(kmeansmtcars_s$cluster),FUN=mean),
file = "results/kmeansmtcars_s_cluster_m.csv")
# append cluster assignment
mtcars_s <- data.frame(mtcars_s, kmeansmtcars_s$cluster)
write.csv(mtcars_s,
file = "results/kmeansmtcars_s_table.csv")
table(mtcars_s$kmeansmtcars_s.cluster)
# see also SensoMineR package
# Optional Lab - clustering households by electricity consumption
### Housekeeping ------------------------
# clear the workspace
rm(list=ls())
#libraries
library(data.table)
# may need to install these
library(nFactors)
library(psych)
library(corrgram)
library(scatterplot3d)
# just to check where R thinks we are
getwd()
# data
ifile <- "http://www.soton.ac.uk/~ba1e12/CER_wHourlyMeanByIDOct09j.csv"
# Load pre-processed data ----
# This contains mean kWh electricity consumption per hour per household ID in October 2009
# from a trial of smart meters
# Merged to a few household survey attributes
meanKwhDT <- as.data.table(read.csv(ifile))
dim(meanKwhDT)
names(meanKwhDT)
# Start cluster analysis here ----
# ref http://www.statmethods.net/advstats/cluster.html
# Create scaled data table of just the hourly values
# DT[,.(V2,V3)]
subsetDT <- subset(meanKwhDT,select = c(1:26))
subsetDT_s <- scale(subsetDT)
# Step 1 - calculate sum of squares to determine number of clusters
# how do we do this? Check Tuesday lecture code/notes
??
# Step 2 K-Means Cluster Analysis
# XX?? cluster solution?
kmeanssubsetDT_s <- kmeans(subsetDT_s, ????) # ???? cluster solution?
# get cluster means - probably best to write out as cvs for visualisation?
# how do we do this ?? - check Tuesday lecture notes & code
# Step 3 append cluster assignment
# to scaled data
subsetDT_s <- data.frame(subsetDT_s, kmeanssubsetDT_s$cluster)
# to original data
meanKwhDT <- data.frame(meanKwhDT, kmeanssubsetDT_s$cluster)
# Step 4 try a logit model to predict cluster membership based on household attributes
# you will need to recode the clusters to binary variables first!
# refer to weeks 8-9 for how to do this!!
# End
# Optional Lab - clustering households by electricity consumption
### Housekeeping ------------------------
# clear the workspace
rm(list=ls())
#libraries
library(data.table)
# just to check where R thinks we are
getwd()
# data path
dpath <- "~/OneDrive - University of Southampton/PG/Southampton/FEEG6025 Data Analysis & Experimental Methods for Engineers/Coursework 2 - Group Assignment/Q4 DNO Trial Data Package"
# results path
rpath <- "~/OneDrive - University of Southampton/PG/Southampton/FEEG6025 Data Analysis & Experimental Methods for Engineers/FEEG6025_github/results"
# Load & process data ----
preTsurvey_coreDT <- fread(paste0(dpath,
"/Pre-trial-survey-residential-sample-v1_core_variables.csv"))
preTsurvey_fullDT <- fread(paste0(dpath,
"/Pre-trial-survey-residential-sample-v1_all_variables.csv"))
system.time(
elec_oct_DT <- fread(paste0(dpath, "/halfhour_kWh_100pc_sample.zip Folder/halfhour-electricity-all_residential_100pc_wf_Oct2009_Oct_2010_v1.csv"))
)
# sampling - draw a roughly 20% random sample of the households for testing purposes
sample_preTsurvey_coreDT <- preTsurvey_coreDT[sample(1:nrow(preTsurvey_coreDT),
800, replace = FALSE)]
# Process dates
# fix dates using update - this actually decreases the size of the table.
size1 <- object.size(elec_oct_DT)
size1
system.time(elec_oct_DT[, s_datetime:= as.POSIXct(elec_oct_DT$s_datetime,
tz="",
"%d%b%Y %H:%M:%S")]
)
size2 <- object.size(elec_oct_DT)
size2
size1 - size2 #proof!
# extract useful time elements
system.time(elec_oct_DT$r_year <- as.POSIXlt(elec_oct_DT$s_datetime)$year) # since 1900
system.time(elec_oct_DT$r_mday <- as.POSIXlt(elec_oct_DT$s_datetime)$mday)
system.time(elec_oct_DT$r_wday <- as.POSIXlt(elec_oct_DT$s_datetime)$wday) # Sunday = 0
system.time(elec_oct_DT$r_hour <- as.POSIXlt(elec_oct_DT$s_datetime)$hour)
# check first 4 rows
elec_oct_DT[1:4,.(s_datetime, r_year, r_mday, r_wday, r_hour)]
# Merge test using sample
setkey(elec_oct_DT, ID)
setkey(sample_preTsurvey_coreDT, ID)
# using data.table built in join
system.time(
sample_joinedOctDT <- elec_oct_DT[sample_preTsurvey_coreDT]
)
# join faster although may not be faster at scale?
# Merge complete dataset using join
setkey(preTsurvey_coreDT, ID)
system.time(
joinedOctDT <- elec_oct_DT[preTsurvey_coreDT]
)
# Create hourly mean consupmtion per household ID for October 2009 only
hourlyMeanByIDOct09 <- joinedOctDT[r_year == 109,
.(
Mean_kWh = mean(kwh, na.rm = TRUE)
),
by = .(ID, r_hour
)
]
# Turn it into wide form
wHourlyMeanByIDOct09 <- reshape(hourlyMeanByIDOct09, direction="wide", idvar="ID", timevar = "r_hour")
# Merge wide data to core survey data
setkey(wHourlyMeanByIDOct09, ID)
wHourlyMeanByIDOct09j <- wHourlyMeanByIDOct09[preTsurvey_coreDT]
print(paste0("Writing data to: ", dpath))
write.csv(wHourlyMeanByIDOct09j,
file = (
paste0(
dpath,
"/CER_wHourlyMeanByIDOct09j.csv"
)
)
)
# Optional Lab - clustering households by electricity consumption
### Housekeeping ------------------------
# clear the workspace
rm(list=ls())
#libraries
library(data.table)
# may need to install these
library(nFactors)
library(psych)
library(corrgram)
library(scatterplot3d)
# just to check where R thinks we are
getwd()
# data
ifile <- "http://www.soton.ac.uk/~ba1e12/CER_wHourlyMeanByIDOct09j.csv"
# Load pre-processed data ----
# This contains mean kWh electricity consumption per hour per household ID in October 2009
# from a trial of smart meters
# Merged to a few household survey attributes
meanKwhDT <- as.data.table(read.csv(ifile))
dim(meanKwhDT)
names(meanKwhDT)
# Start cluster analysis here ----
# ref http://www.statmethods.net/advstats/cluster.html
# Create scaled data table of just the hourly values
# DT[,.(V2,V3)]
subsetDT <- subset(meanKwhDT,select = c(1:26))
subsetDT_s <- scale(subsetDT)
# Step 1 - calculate sum of squares to determine number of clusters
subsetDT_s_wss <- (nrow(subsetDT_s)-1)*sum(apply(subsetDT_s,2,var))
for (i in 2:15) subsetDT_s_wss[i] <- sum(kmeans(subsetDT_s,
centers=i)$withinss)
plot(1:15, subsetDT_s_wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares",
main = "Elec: n clusters vs within group sum of squares")
# K-Means Cluster Analysis
kmeanssubsetDT_s <- kmeans(subsetDT_s, 10) # 10 cluster solution?
# get cluster means - probably best to write out as cvs for visualisation?
aggregate(subsetDT_s,by=list(kmeanssubsetDT_s$cluster),FUN=mean)
# append cluster assignment
# to scaled data
subsetDT_s <- data.frame(subsetDT_s, kmeanssubsetDT_s$cluster)
# to original data
meanKwhDT <- data.frame(meanKwhDT, cluster = kmeanssubsetDT_s$cluster)
# results path edit for your location
rpath <- "~/OneDrive - University of Southampton/PG/Southampton/FEEG6025 Data Analysis & Experimental Methods for Engineers/FEEG6025_github/results"
write.csv(aggregate(subsetDT_s,by=list(kmeanssubsetDT_s$cluster),FUN=mean),
file = paste0(rpath,"/cer_clusterHourlyMeans_scaled.csv")
)
write.csv(aggregate(meanKwhDT,by=list(kmeanssubsetDT_s$cluster),FUN=mean),
file = paste0(rpath,"/cer_clusterHourlyMeans_raw.csv")
)
# try a logit model to predict cluster membership based on household attributes
# you will need to recode the clusters to binary variables first!
# End
# code by Stephanie S.Gauthier@soton.ac.uk, @Stepha_Gauthier
# use it to analyse the heart monitor data collected in the lab
# Open your csv file
HR <- read.csv("WRITE THE PATH OF YOUR FILE HERE!")
######################################
# Review your file
nrow(HR) #number of rows
head(HR) #first 6 rows of your data frame
tail(HR) #last 6 rows of your data frame
######################################
# Review one of the variables of your file
max(HR$HRbpm) #maximun value of the variable HRbpm
min(HR$HRbpm) #minimum value of the variable HRbpm
max(HR$HRbpm)-min(HR$HRbpm) #range of the variable HRbpm
mean(HR$HRbpm) #mean of the variable HRbpm
median(HR$HRbpm) #median of the variable HRbpm
as.numeric(names(table(round(HR$HRbpm,0)))[which.max(table(round(HR$HRbpm,0)))]) #mode of the variable HRbpm
sd(HR$HRbpm) #standard deviation of the variable HRbpm (assuming that the variable HRbpm is a sample of the population)
######################################
# Plot one of the variables of your file
# Line graph
plot(HR$HRbpm, type="p", col=2, main="Heart Rate", ylab="Heart Rate [bpm]", xlab="Time [s]", ylim=c(50,200))
# Histogram
hist(HR$HRbpm, breaks=(max(HR$HRbpm)-min(HR$HRbpm))/2, main="Heart Rate", xlab="Heart Rate [bpm]")
abline(v=mean(HR$HRbpm), col=2, lwd=2, lty=1)
abline(v=mean(HR$HRbpm)+sd(HR$HRbpm), col=2, lwd=1, lty=2)
abline(v=mean(HR$HRbpm)-sd(HR$HRbpm), col=2, lwd=1, lty=2)
# Script to load & process simple survey dataset
# Survey: https://www.isurvey.soton.ac.uk/admin/section_list.php?surveyID=17669
# Survey: https://www.isurvey.soton.ac.uk/17669 (initial student backgorund)
# clear the workspace
rm(list=ls())
# change working directory
setwd("~/OneDriveBusiness/PG/Southampton/FEEG6025 Data Analysis & Experimental Methods for Engineers/")
# input file name
file <- "SurveyID17669_23_30"
# load the data as downloaded from isurvey into a data frame
# it's in week 2
classSurveyDF <- read.csv(paste0("Week 2/", file, ".csv"))
# create a fictional age with replacement (so can have repeat values)
n <- length(classSurveyDF$Participant.ID)
classSurveyDF$age <- sample(seq(21, 35), size = n, replace = TRUE)
# check
hist(classSurveyDF$age, xlab = "Age", main = "Histogram of Age")
# convert the previous course data to be non-disclosive
classSurveyDF$previous_full <- classSurveyDF$What.was.your.first.degree...e.g..MEng.Civil.Engineering........
classSurveyDF$previous_safe <- "Other" # non-disclosive default
classSurveyDF$previous_safe[grep("ngine",classSurveyDF$previous_full)] <- "Engineer"
classSurveyDF$previous_safe[grep("ivil",classSurveyDF$previous_full)] <- "Civil Engineer"
# set to NA if we don't know
classSurveyDF$previous_safe[classSurveyDF$Date.Finished == "Did not finish"] <- NA
table(classSurveyDF$previous_safe)
# make safe
classSurveyDF$previous_full <- NULL
classSurveyDF$What.was.your.first.degree...e.g..MEng.Civil.Engineering........ <- NULL
safeClassSurveyDF <- classSurveyDF[,-grep("X.|Previous.ID|.Order|YOUR|WHAT|FUTURE", colnames(classSurveyDF))]
# save it to week 3
write.csv(safeClassSurveyDF, file = paste0("Data/", file, "_initial_wf.csv"))
# Meta ----
# Script to load & process simple survey dataset
# Survey admin: https://www.isurvey.soton.ac.uk/admin/section_list.php?surveyID=17669
# Survey: https://www.isurvey.soton.ac.uk/17669
# code by: b.anderson@soton.ak.uk (@dataknut) with help from lsb1@soton.ac.uk
### Housekeeping ------------------------
# clear the workspace
rm(list=ls())
# where is the default working directory?
getwd()
# set location of data
# you will need to change this to whever you put the data (.csv fiule) you just downloaded
# use the results of getwd() above to see the format to use for the PC you are using
dpath <- "~/UoS One Drive/PG/Southampton/FEEG6025 Data Analysis & Experimental Methods for Engineers/Data"
# change the working directory/folder so R can find the data easily
setwd(dpath)
# input file name
# make sure this is the name of the data file (without the .csv suffix)
file <- "SurveyID17669_23_30_initial_wf"
### Functions ----
### Luke's finish time conversion
convertiSurveyFinishTime <- function(iSurveyFinishTime) {
# lapply applies function(x) to every list item; this deals with character(0) results etc.
as.POSIXct(unlist(