# Print out the date & time - always useful to know
Sys.time()# type the first few characters of this function in the console - what happens?
# Take some time to explore the RStudio UI ----
# > Console ----
# > Files/Packages/Help (very useful!) ----
# > Script editor ----
# > Environment ----
# Play with some numbers ----
# Do some simple maths
...
...
@@ -35,10 +43,15 @@ sqrt(result)
# Before we create a data table we have to load the data.table library as it is not a core part of R ('base')
library(data.table)
# if that fails we need to use:
# install.packages("data.table") # this will install the package from the default package archive (probably CRAN)
# OK, now we're ready
# Create a new data table with one column called 'index' and n rows defined by.... nRows
# First define the number of rows
nRows<-100
dt<-data.table(index=rep(1:nRows))
# Now create the data table which we'll call 'dt' (we could call it (almost) anything)
dt<-data.table(index=rep(1:nRows))# say hello to the really useful rep() function
# check what it looks like
dt
...
...
@@ -65,28 +78,67 @@ summary(dt)
# But we'd quite like to visualise the data - draw a simple plot
plot(dt)
# Or maybe we'd like to test a correlation
cor.test(dt$index,dt$sqrtIndex)
# Did you get bored typing dt twice?
with(dt,cor.test(index,sqrtIndex))# use with() to tell R which object (usually a data.table) you want to work with. In this case it made for more typing but if your object name is long or you need to type a lot of variables it will save on finger-wear
# Now let's try a very simple linear regression model and catch the results in a model result object
# Bonus points for telling me why a linear model is the _wrong_ choice!
linearModelResult<-lm(index~sqrtIndex,dt)
linearModelResult<-lm(index~sqrtIndex,dt)# Note the lm function forces us to specify the data to use
summary(linearModelResult)# note we use 'summary' again but it knows what to with a model result object
# Now the fun part: loading data ----
# Play with ggplot ----
# > Load an internal R dataset ----
mtcarsDT<-data.table(mtcars)
mtcarsDT<-data.table(mtcars)# Note I use 'DT' to remind me this object is a data table
# to find out which other datasets come with R run data()
head(mtcarsDT)
# Create a simple (!) visualisation of all the variables
pairs(mtcarsDT)
# > Load one from a local file ----
# So far we've used R's base plot() function.
# This is OK but ggplot is a package that creates very fancy graphs - http://ggplot2.tidyverse.org/reference/
# Load ggplot
library(ggplot2)
# if that fails we need to use:
# install.packages("ggplot2") # this will install the package from the default package archive (probably CRAN)
# > Now let's create a basic ggplot object
myPlot<-ggplot(mtcarsDT)
# test it
myPlot# what happens?
# We need to tell ggplot what the axes are
myPlot<-ggplot(mtcarsDT,aes(x=disp,y=mpg))
myPlot# what happens?
# We need to tell ggplot what to plot
myPlot<-ggplot(mtcarsDT,aes(x=disp,y=mpg))+
geom_point()# a point plot - here are lots more http://ggplot2.tidyverse.org/reference/index.html#section-layer-geoms
myPlot
# We want capitals on our axis labels and a caption. I want to use them again and again so we use variables to store them
myCaption<-paste0("R mtcars data: n obs = ",nrow(mtcarsDT))# Note the use of paste0() to construct a caption from text and R
myXlabel<-"Displacement"
myYlabel<-"Miles per gallon"
# Now add the axes labels & caption
myPlot<-myPlot+labs(x=myXlabel,y=myYlabel,caption=myCaption)# Note how we just add to myPlot
# see http://ggplot2.tidyverse.org/index.html for other things we can do
myPlot
# Let's have some colour
myPlot<-myPlot+geom_point(colour="blue")# see how we're building up layers?
myPlot
# Let's use colour to tell us something
myPlot<-myPlot+geom_point(aes(colour=gear))
myPlot
# Now we're happy we can do all that in one call and add an outlier label:
# Lots more examples at http://r4ds.had.co.nz/data-visualisation.html
ggplot(mtcarsDT,aes(x=disp,y=mpg))+
geom_point(aes(colour=gear))+
labs(x=myXlabel,y=myYlabel,caption=myCaption)+
geom_label(x=250,y=23,label="Outlier?")
# > Load a dataset from a local file ----
# This will only work if you have this folder & file in the same place as the R script
pressureLocalDT<-fread("data/pressure.csv")# fread comes with data.table, is VERY fast & automatically creates a data.table
summary(pressureLocalDT)
...
...
@@ -94,10 +146,58 @@ plot(pressureLocalDT)
# > Load one from the internet ----
# This will only work if you have internet access!
pressureNetDT<-fread("https://git.soton.ac.uk/ba1e12/intro2R/raw/master/data/pressure.csv")# fread comes with data.table, is VERY fast & automatically creates a data.table
generationDT<-generationDT[,rDate:=as.Date(Trading_date)]# fix the dates so R knows what they are
# You will notice that these were .csv files.
# R likes .csv
# But if you must you can import from just about anything else - SAS, STATA, Matlab, SPSS, Excel (I recommend https://cran.r-project.org/web/packages/readxl/index.html)