# Create a data.table - these are similar to data frames which are built in to R (both can be conceived as 'spreadsheets' or 'tables') but much faster & more memory efficient, so they're good for Big Data (tm)
# Before we create a data table we have to load the data.table library as it is not a core part of R ('base')
# now do something to the value of index in each row and save it to a new variable
dt<-dt[,sqrtIndex:=sqrt(index)]# NB: we didn't need to do the <- assignment because data.table will update it's contents but we'll stick with assignment for now to ease the cognitive load!
# Note that we didn't need to loop over the rows - R takes care of that, 'vestorised' computation
# check what it looks like - there should now be two columns.
dt
...
...
@@ -69,3 +76,28 @@ with(dt, cor.test(index, sqrtIndex)) # use with() to tell R which object (usuall
linearModelResult<-lm(index~sqrtIndex,dt)
summary(linearModelResult)# note we use 'summary' again but it knows what to with a model result object
# Now the fun part: loading data ----
# > Load an internal R dataset ----
mtcarsDT<-data.table(mtcars)
head(mtcarsDT)
# Create a simple (!) visualisation of all the variables
pairs(mtcarsDT)
# > Load one from a local file ----
# This will only work if you have this folder & file in the same place as the R script
pressureLocalDT<-fread("data/pressure.csv")# fread comes with data.table, is VERY fast & automatically creates a data.table
summary(pressureLocalDT)
plot(pressureLocalDT)
# > Load one from the internet ----
# This will only work if you have internet access!
pressureNetDT<-fread("https://git.soton.ac.uk/ba1e12/intro2R/raw/master/data/pressure.csv")# fread comes with data.table, is VERY fast & automatically creates a data.table
summary(pressureNetDT)
# You will notice that these were .csv files.
# R likes .csv
# But if you must you can import from just about anything else - SAS, STATA, Matlab, SPSS, Excel (I recommend https://cran.r-project.org/web/packages/readxl/index.html)