diff --git a/myFirstRScript.R b/myFirstRScript.R index c39b186e3ff07ce3b0458cc27477293309bf094b..3422f02f796a0e8b27a1f350a914ff753be64857 100644 --- a/myFirstRScript.R +++ b/myFirstRScript.R @@ -4,15 +4,19 @@ # Helpful documentation: # https://www.rdocumentation.org/ +# https://cran.r-project.org/ # https://www.statmethods.net/index.html #Â https://stackoverflow.com/questions/tagged/r +# http://r4ds.had.co.nz/ +# Preliminaries ---- # Print out the date - always useful to know Sys.Date() # Print out the time - also useful to know Sys.time() +# Play with some numbers ---- # Do some simple maths 4 + 4 @@ -25,6 +29,8 @@ result # Do something to the result and print the answer sqrt(result) +# Play with some data ---- + # Create a data.table - these are similar to data frames which are built in to R (both can be conceived as 'spreadsheets' or 'tables') but much faster & more memory efficient, so they're good for Big Data (tm) # Before we create a data table we have to load the data.table library as it is not a core part of R ('base') library(data.table) @@ -37,8 +43,9 @@ dt <- data.table(index = rep(1:nRows)) # check what it looks like dt -# now do something to each row of index +# now do something to the value of index in each row and save it to a new variable dt <- dt[, sqrtIndex := sqrt(index)] # NB: we didn't need to do the <- assignment because data.table will update it's contents but we'll stick with assignment for now to ease the cognitive load! +# Note that we didn't need to loop over the rows - R takes care of that, 'vestorised' computation # check what it looks like - there should now be two columns. dt @@ -69,3 +76,28 @@ with(dt, cor.test(index, sqrtIndex)) # use with() to tell R which object (usuall linearModelResult <- lm(index ~ sqrtIndex, dt) summary(linearModelResult) # note we use 'summary' again but it knows what to with a model result object + +# Now the fun part: loading data ---- +# > Load an internal R dataset ---- +mtcarsDT <- data.table(mtcars) + +head(mtcarsDT) + +# Create a simple (!) visualisation of all the variables +pairs(mtcarsDT) + +# > Load one from a local file ---- +# This will only work if you have this folder & file in the same place as the R script +pressureLocalDT <- fread("data/pressure.csv") # fread comes with data.table, is VERY fast & automatically creates a data.table +summary(pressureLocalDT) +plot(pressureLocalDT) + +# > Load one from the internet ---- +# This will only work if you have internet access! +pressureNetDT <- fread("https://git.soton.ac.uk/ba1e12/intro2R/raw/master/data/pressure.csv") # fread comes with data.table, is VERY fast & automatically creates a data.table +summary(pressureNetDT) + +# You will notice that these were .csv files. +# R likes .csv +# But if you must you can import from just about anything else - SAS, STATA, Matlab, SPSS, Excel (I recommend https://cran.r-project.org/web/packages/readxl/index.html) +# https://cran.r-project.org/doc/manuals/r-release/R-data.html \ No newline at end of file