diff --git a/myFirstRMarkdown.Rmd b/myFirstRMarkdown.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..0ca6d21d383d04ea18f54d7fe354c36a4537202b --- /dev/null +++ b/myFirstRMarkdown.Rmd @@ -0,0 +1,106 @@ +--- +title: "myFirstRMarkdown" +author: "Ben Anderson (b.anderson@soton.ac.uk `@dataknut`)" +date: 'Last run at: `r Sys.time()`' +output: + html_document: + fig_caption: yes + keep_md: yes + number_sections: yes + self_contained: no + toc: yes + toc_float: yes +--- + +```{r Setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) + +# Libraries ---- +library(data.table) +library(ggplot2) +``` + +# Introduction + +This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>. + +When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. + +#Â Load data + +You can embed an R code chunk like this: + +```{r load the generation data over the net} +generationDTorig <- fread("https://www.emi.ea.govt.nz/Wholesale/Datasets/Generation/Generation_MD/201712_Generation_MD.csv") + +# reshape the data as it comes in a rather unhelpful form +generationDT <- melt(generationDTorig, + id.vars=c("Site_Code","POC_Code","Nwk_Code", "Gen_Code", "Fuel_Code", "Tech_Code","Trading_date"), + variable.name = "Time_Period", # converts TP1-48 + value.name = "MW" # Megawatts +) + +generationDT <- generationDT[, rDate := as.Date(Trading_date)] # fix the dates so R knows what they are``` +``` + +# Summarise the data + +You can also embed tables, for example: + +```{r table1, caption = "Summary"} +summary(generationDT) +``` + +Or you can format the table nicely using the kable function from the knitr package: + +```{r table2} +#Â note the chunk names needs to be unique +t <- summary(generationDT) +library(knitr) +kable(t, caption = "Summary table") +``` + + +# Plot the data + +Use ggplot to embed a chart in the document. + +```{r plot1, fig.cap="Plot generation by date and time period", echo=FALSE} +#Â See how we turned off printing the code into the report + +#Â create an aggregated tabe summing generation by fuel, date and time period. This is what data.table is really good at +plotDT <- generationDT[, + .(totalMW = sum(MW), + nObs = .N), + keyby = .(Time_Period, rDate, Fuel_Code)] + +# > Use the new data to draw a chart of all generation in the data ---- +myCaption <- "Source: January 2018 wholesale generation data via EMI (NZ Electricity Authority)" +ggplot(plotDT, + aes(x = Time_Period, y = totalMW, colour = as.factor(rDate), group = rDate)) + + geom_point() + + facet_grid(Fuel_Code ~ .) + + labs(caption = myCaption) + +``` + +Now draw a plot of hydro as a % of all. + +```{r, fig.cap="Plot hydro as a % of all generation by date and time period"} +# > hydro as a % of all ---- +plotDT <- plotDT[, sumAllMW := sum(totalMW), keyby = .(Time_Period, rDate)] # calculate total +plotDT <- plotDT[, pcTotalMW := 100*(totalMW/sumAllMW)] # calculate % + +# > Use the new data to draw a chart of hydro as a % of all during Christmas week ---- +ggplot(plotDT[Fuel_Code == "Hydro"], + aes(x = Time_Period, y = pcTotalMW, colour = as.factor(rDate), group = rDate)) + + geom_line() +``` + +# Discuss your results +here + +# Conclusions +go here + +# References