diff --git a/Rmd/cleaningFeederData.Rmd b/Rmd/cleaningFeederData.Rmd index aa31ad5ec002c798e509b654822e30c77169eca0..032a46503dfc3cffc86b096f41b1198d70b97694 100644 --- a/Rmd/cleaningFeederData.Rmd +++ b/Rmd/cleaningFeederData.Rmd @@ -9,7 +9,7 @@ author: '`r params$authors`' date: 'Last run at: `r Sys.time()`' output: bookdown::html_document2: - self_contained: TRUE + self_contained: no fig_caption: yes code_folding: hide number_sections: yes @@ -305,13 +305,17 @@ aggDT[, propExpected := sumOK/(uniqueN(feederDT$feeder_ID)*24*4)] # we expect 25 summary(aggDT) message("How many days have 100%?") -nrow(aggDT[propExpected == 1]) +n <- nrow(aggDT[propExpected == 1]) +n ``` +So, there are `r n` days with 100% data... + If we plot the mean then we will see which days get closest to having a full dataset. ```{r bestDaysMean, fig.width=8} ggplot2::ggplot(aggDT, aes(x = rDate, colour = season, y = meanOK)) + geom_point() + ``` Re-plot by the % of expected if we assume we _should_ have 25 feeders * 24 hours * 4 per hour (will be the same shape): @@ -319,6 +323,27 @@ Re-plot by the % of expected if we assume we _should_ have 25 feeders * 24 hours ```{r bestDaysProp, fig.width=8} ggplot2::ggplot(aggDT, aes(x = rDate, colour = season, y = 100*propExpected)) + geom_point() + labs(y = "%") + +aggDT[, rDoW := lubridate::wday(rDate, lab = TRUE)] +h <- head(aggDT[season == "Spring"][order(-propExpected)]) +kableExtra::kable(h, caption = "Best Spring days overall", + digits = 3) %>% + kable_styling() + +h <- head(aggDT[season == "Summer"][order(-propExpected)]) +kableExtra::kable(h, caption = "Best Summer days overall", + digits = 3) %>% + kable_styling() + +h <- head(aggDT[season == "Autumn"][order(-propExpected)]) +kableExtra::kable(h, caption = "Best Autumn days overall", + digits = 3) %>% + kable_styling() + +h <- head(aggDT[season == "Winter"][order(-propExpected)]) +kableExtra::kable(h, caption = "Best Winter days overall", + digits = 3) %>% + kable_styling() ``` This also tells us that there is some reason why we get fluctations in the number of data points per hour after 2003. diff --git a/Rmd/cleaningFeederData_allData.log b/Rmd/cleaningFeederData_allData.log index 8b7564885cbe3188d26fd17986f41690f91ac17e..318599028f02c106bb96cf5612c33b613d85db70 100644 --- a/Rmd/cleaningFeederData_allData.log +++ b/Rmd/cleaningFeederData_allData.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013) (format=pdflatex 2020.4.15) 8 JUL 2020 22:59 +This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013) (format=pdflatex 2020.4.15) 9 JUL 2020 00:11 entering extended mode restricted \write18 enabled. %&-line parsing enabled. diff --git a/_drakeCleanFeeders.R b/_drakeCleanFeeders.R index 9e0fbd70f41d78079d0bc2a59b3c8e8e379ca6dd..1cfc5b2a176b0234cad9da3b1292bfcd6a141282 100644 --- a/_drakeCleanFeeders.R +++ b/_drakeCleanFeeders.R @@ -54,7 +54,7 @@ addSeason <- function(dt,dateVar,h){ } -getData <- function(f,update){ +getData <- function(f,updateData){ # gets the data dt <- data.table::fread(f) dt[, rDateTime := lubridate::as_datetime(Time)] # the dateTime is now called Time!!! @@ -120,7 +120,7 @@ saveData <- function(dt, which){ } } -makeReport <- function(f,version, type = "html"){ +makeReport <- function(f,version, type = "html", updateReport){ # default = html message("Rendering ", f, ".Rmd (version: ", version, ") to ", type) if(type == "html"){ @@ -149,14 +149,14 @@ makeReport <- function(f,version, type = "html"){ # Set the drake plan ---- my_plan <- drake::drake_plan( - origData = getData(dFile, update), # returns data as data.table. If you edit 'update' in any way it will reload - drake is watching you! + origData = getData(dFile, updateData), # returns data as data.table. If you edit 'update' in any way it will reload - drake is watching you! uniqData = makeUniq(origData), # remove duplicates wideData = toWide(uniqData), saveLong = saveData(uniqData, "L"), # doesn't actually return anything saveWide = saveData(wideData, "W"), # doesn't actually return anything # pdf output fails - #pdfOut = makeReport(rmdFile, version, "pdf"), # pdf - must be some way to do this without re-running the whole thing - htmlOut = makeReport(rmdFile, version, "html") # html output + pdfOut = makeReport(rmdFile, version, "pdf", updateReport), # pdf - must be some way to do this without re-running the whole thing + htmlOut = makeReport(rmdFile, version, "html", updateReport) # html output ) # see https://books.ropensci.org/drake/projects.html#usage diff --git a/docs/cleaningFeederData_allData.html b/docs/cleaningFeederData_allData.html index 33850f81b9338ea46e5ef51291d40c75818e2ed9..6b6c96862139d78a9278a2cf7eec1190202e5897 100644 --- a/docs/cleaningFeederData_allData.html +++ b/docs/cleaningFeederData_allData.html @@ -181,7 +181,7 @@ summary { <h1 class="title toc-ignore">Testing electricity substation/feeder data</h1> <h3 class="subtitle">Outliers and missing data...</h3> <h4 class="author">Ben Anderson & Ellis Ridett</h4> -<h4 class="date">Last run at: 2020-07-08 23:36:27</h4> +<h4 class="date">Last run at: 2020-07-09 00:01:07</h4> </div> @@ -2296,8 +2296,10 @@ summary(aggDT)</code></pre> ## Max. :77.00 Max. :6130 Max. :0.8186432 ## </code></pre> <pre class="r"><code>message("How many days have 100%?") -nrow(aggDT[propExpected == 1])</code></pre> +n <- nrow(aggDT[propExpected == 1]) +n</code></pre> <pre><code>## [1] 0</code></pre> +<p>So, there are 0 days with 100% data...</p> <p>If we plot the mean then we will see which days get closest to having a full dataset.</p> <pre class="r"><code>ggplot2::ggplot(aggDT, aes(x = rDate, colour = season, y = meanOK)) + geom_point()</code></pre> <p><img src="" width="768" /></p> @@ -2305,6 +2307,787 @@ nrow(aggDT[propExpected == 1])</code></pre> <pre class="r"><code>ggplot2::ggplot(aggDT, aes(x = rDate, colour = season, y = 100*propExpected)) + geom_point() + labs(y = "%")</code></pre> <p><img src="" width="768" /></p> +<pre class="r"><code>aggDT[, rDoW := lubridate::wday(rDate, lab = TRUE)] +h <- head(aggDT[season == "Spring"][order(-propExpected)]) +kableExtra::kable(h, caption = "Best Spring days overall", + digits = 3) %>% + kable_styling()</code></pre> +<table class="table" style="margin-left: auto; margin-right: auto;"> +<caption> +Best Spring days overall +</caption> +<thead> +<tr> +<th style="text-align:left;"> +rDate +</th> +<th style="text-align:left;"> +season +</th> +<th style="text-align:right;"> +meanOK +</th> +<th style="text-align:right;"> +minOk +</th> +<th style="text-align:right;"> +maxOk +</th> +<th style="text-align:right;"> +sumOK +</th> +<th style="text-align:right;"> +propExpected +</th> +<th style="text-align:left;"> +rDoW +</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:left;"> +2002-04-14 +</td> +<td style="text-align:left;"> +Spring +</td> +<td style="text-align:right;"> +63.490 +</td> +<td style="text-align:right;"> +60 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6095 +</td> +<td style="text-align:right;"> +0.814 +</td> +<td style="text-align:left;"> +Sun +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-03-20 +</td> +<td style="text-align:left;"> +Spring +</td> +<td style="text-align:right;"> +63.458 +</td> +<td style="text-align:right;"> +61 +</td> +<td style="text-align:right;"> +67 +</td> +<td style="text-align:right;"> +6092 +</td> +<td style="text-align:right;"> +0.814 +</td> +<td style="text-align:left;"> +Thu +</td> +</tr> +<tr> +<td style="text-align:left;"> +2002-03-21 +</td> +<td style="text-align:left;"> +Spring +</td> +<td style="text-align:right;"> +63.385 +</td> +<td style="text-align:right;"> +62 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6085 +</td> +<td style="text-align:right;"> +0.813 +</td> +<td style="text-align:left;"> +Thu +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-03-14 +</td> +<td style="text-align:left;"> +Spring +</td> +<td style="text-align:right;"> +63.385 +</td> +<td style="text-align:right;"> +61 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6085 +</td> +<td style="text-align:right;"> +0.813 +</td> +<td style="text-align:left;"> +Fri +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-03-16 +</td> +<td style="text-align:left;"> +Spring +</td> +<td style="text-align:right;"> +63.375 +</td> +<td style="text-align:right;"> +60 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6084 +</td> +<td style="text-align:right;"> +0.812 +</td> +<td style="text-align:left;"> +Sun +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-03-17 +</td> +<td style="text-align:left;"> +Spring +</td> +<td style="text-align:right;"> +63.375 +</td> +<td style="text-align:right;"> +61 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6084 +</td> +<td style="text-align:right;"> +0.812 +</td> +<td style="text-align:left;"> +Mon +</td> +</tr> +</tbody> +</table> +<pre class="r"><code>h <- head(aggDT[season == "Summer"][order(-propExpected)]) +kableExtra::kable(h, caption = "Best Summer days overall", + digits = 3) %>% + kable_styling()</code></pre> +<table class="table" style="margin-left: auto; margin-right: auto;"> +<caption> +Best Summer days overall +</caption> +<thead> +<tr> +<th style="text-align:left;"> +rDate +</th> +<th style="text-align:left;"> +season +</th> +<th style="text-align:right;"> +meanOK +</th> +<th style="text-align:right;"> +minOk +</th> +<th style="text-align:right;"> +maxOk +</th> +<th style="text-align:right;"> +sumOK +</th> +<th style="text-align:right;"> +propExpected +</th> +<th style="text-align:left;"> +rDoW +</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:left;"> +2003-08-22 +</td> +<td style="text-align:left;"> +Summer +</td> +<td style="text-align:right;"> +63.854 +</td> +<td style="text-align:right;"> +56 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6130 +</td> +<td style="text-align:right;"> +0.819 +</td> +<td style="text-align:left;"> +Fri +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-08-30 +</td> +<td style="text-align:left;"> +Summer +</td> +<td style="text-align:right;"> +63.844 +</td> +<td style="text-align:right;"> +60 +</td> +<td style="text-align:right;"> +66 +</td> +<td style="text-align:right;"> +6129 +</td> +<td style="text-align:right;"> +0.819 +</td> +<td style="text-align:left;"> +Sat +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-08-31 +</td> +<td style="text-align:left;"> +Summer +</td> +<td style="text-align:right;"> +63.812 +</td> +<td style="text-align:right;"> +59 +</td> +<td style="text-align:right;"> +66 +</td> +<td style="text-align:right;"> +6126 +</td> +<td style="text-align:right;"> +0.818 +</td> +<td style="text-align:left;"> +Sun +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-08-23 +</td> +<td style="text-align:left;"> +Summer +</td> +<td style="text-align:right;"> +63.677 +</td> +<td style="text-align:right;"> +56 +</td> +<td style="text-align:right;"> +66 +</td> +<td style="text-align:right;"> +6113 +</td> +<td style="text-align:right;"> +0.816 +</td> +<td style="text-align:left;"> +Sat +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-08-25 +</td> +<td style="text-align:left;"> +Summer +</td> +<td style="text-align:right;"> +63.677 +</td> +<td style="text-align:right;"> +55 +</td> +<td style="text-align:right;"> +66 +</td> +<td style="text-align:right;"> +6113 +</td> +<td style="text-align:right;"> +0.816 +</td> +<td style="text-align:left;"> +Mon +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-08-26 +</td> +<td style="text-align:left;"> +Summer +</td> +<td style="text-align:right;"> +63.656 +</td> +<td style="text-align:right;"> +58 +</td> +<td style="text-align:right;"> +66 +</td> +<td style="text-align:right;"> +6111 +</td> +<td style="text-align:right;"> +0.816 +</td> +<td style="text-align:left;"> +Tue +</td> +</tr> +</tbody> +</table> +<pre class="r"><code>h <- head(aggDT[season == "Autumn"][order(-propExpected)]) +kableExtra::kable(h, caption = "Best Autumn days overall", + digits = 3) %>% + kable_styling()</code></pre> +<table class="table" style="margin-left: auto; margin-right: auto;"> +<caption> +Best Autumn days overall +</caption> +<thead> +<tr> +<th style="text-align:left;"> +rDate +</th> +<th style="text-align:left;"> +season +</th> +<th style="text-align:right;"> +meanOK +</th> +<th style="text-align:right;"> +minOk +</th> +<th style="text-align:right;"> +maxOk +</th> +<th style="text-align:right;"> +sumOK +</th> +<th style="text-align:right;"> +propExpected +</th> +<th style="text-align:left;"> +rDoW +</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:left;"> +2003-09-02 +</td> +<td style="text-align:left;"> +Autumn +</td> +<td style="text-align:right;"> +63.823 +</td> +<td style="text-align:right;"> +57 +</td> +<td style="text-align:right;"> +66 +</td> +<td style="text-align:right;"> +6127 +</td> +<td style="text-align:right;"> +0.818 +</td> +<td style="text-align:left;"> +Tue +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-09-01 +</td> +<td style="text-align:left;"> +Autumn +</td> +<td style="text-align:right;"> +63.771 +</td> +<td style="text-align:right;"> +56 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6122 +</td> +<td style="text-align:right;"> +0.818 +</td> +<td style="text-align:left;"> +Mon +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-09-07 +</td> +<td style="text-align:left;"> +Autumn +</td> +<td style="text-align:right;"> +63.740 +</td> +<td style="text-align:right;"> +57 +</td> +<td style="text-align:right;"> +66 +</td> +<td style="text-align:right;"> +6119 +</td> +<td style="text-align:right;"> +0.817 +</td> +<td style="text-align:left;"> +Sun +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-09-03 +</td> +<td style="text-align:left;"> +Autumn +</td> +<td style="text-align:right;"> +63.667 +</td> +<td style="text-align:right;"> +57 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6112 +</td> +<td style="text-align:right;"> +0.816 +</td> +<td style="text-align:left;"> +Wed +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-09-04 +</td> +<td style="text-align:left;"> +Autumn +</td> +<td style="text-align:right;"> +63.615 +</td> +<td style="text-align:right;"> +57 +</td> +<td style="text-align:right;"> +66 +</td> +<td style="text-align:right;"> +6107 +</td> +<td style="text-align:right;"> +0.816 +</td> +<td style="text-align:left;"> +Thu +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-09-06 +</td> +<td style="text-align:left;"> +Autumn +</td> +<td style="text-align:right;"> +63.552 +</td> +<td style="text-align:right;"> +57 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6101 +</td> +<td style="text-align:right;"> +0.815 +</td> +<td style="text-align:left;"> +Sat +</td> +</tr> +</tbody> +</table> +<pre class="r"><code>h <- head(aggDT[season == "Winter"][order(-propExpected)]) +kableExtra::kable(h, caption = "Best Winter days overall", + digits = 3) %>% + kable_styling()</code></pre> +<table class="table" style="margin-left: auto; margin-right: auto;"> +<caption> +Best Winter days overall +</caption> +<thead> +<tr> +<th style="text-align:left;"> +rDate +</th> +<th style="text-align:left;"> +season +</th> +<th style="text-align:right;"> +meanOK +</th> +<th style="text-align:right;"> +minOk +</th> +<th style="text-align:right;"> +maxOk +</th> +<th style="text-align:right;"> +sumOK +</th> +<th style="text-align:right;"> +propExpected +</th> +<th style="text-align:left;"> +rDoW +</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:left;"> +2002-02-28 +</td> +<td style="text-align:left;"> +Winter +</td> +<td style="text-align:right;"> +63.292 +</td> +<td style="text-align:right;"> +60 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6076 +</td> +<td style="text-align:right;"> +0.811 +</td> +<td style="text-align:left;"> +Thu +</td> +</tr> +<tr> +<td style="text-align:left;"> +2002-02-25 +</td> +<td style="text-align:left;"> +Winter +</td> +<td style="text-align:right;"> +63.125 +</td> +<td style="text-align:right;"> +61 +</td> +<td style="text-align:right;"> +65 +</td> +<td style="text-align:right;"> +6060 +</td> +<td style="text-align:right;"> +0.809 +</td> +<td style="text-align:left;"> +Mon +</td> +</tr> +<tr> +<td style="text-align:left;"> +2002-12-11 +</td> +<td style="text-align:left;"> +Winter +</td> +<td style="text-align:right;"> +62.979 +</td> +<td style="text-align:right;"> +61 +</td> +<td style="text-align:right;"> +64 +</td> +<td style="text-align:right;"> +6046 +</td> +<td style="text-align:right;"> +0.807 +</td> +<td style="text-align:left;"> +Wed +</td> +</tr> +<tr> +<td style="text-align:left;"> +2002-12-01 +</td> +<td style="text-align:left;"> +Winter +</td> +<td style="text-align:right;"> +62.917 +</td> +<td style="text-align:right;"> +61 +</td> +<td style="text-align:right;"> +64 +</td> +<td style="text-align:right;"> +6040 +</td> +<td style="text-align:right;"> +0.807 +</td> +<td style="text-align:left;"> +Sun +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-01-01 +</td> +<td style="text-align:left;"> +Winter +</td> +<td style="text-align:right;"> +62.906 +</td> +<td style="text-align:right;"> +60 +</td> +<td style="text-align:right;"> +64 +</td> +<td style="text-align:right;"> +6039 +</td> +<td style="text-align:right;"> +0.806 +</td> +<td style="text-align:left;"> +Wed +</td> +</tr> +<tr> +<td style="text-align:left;"> +2003-01-03 +</td> +<td style="text-align:left;"> +Winter +</td> +<td style="text-align:right;"> +62.906 +</td> +<td style="text-align:right;"> +60 +</td> +<td style="text-align:right;"> +64 +</td> +<td style="text-align:right;"> +6039 +</td> +<td style="text-align:right;"> +0.806 +</td> +<td style="text-align:left;"> +Fri +</td> +</tr> +</tbody> +</table> <p>This also tells us that there is some reason why we get fluctations in the number of data points per hour after 2003.</p> </div> <div id="summary" class="section level1"> @@ -2313,7 +3096,7 @@ nrow(aggDT[propExpected == 1])</code></pre> </div> <div id="runtime" class="section level1"> <h1>Runtime</h1> -<p>Analysis completed in 218.01 seconds ( 3.63 minutes) using <a href="https://cran.r-project.org/package=knitr">knitr</a> in <a href="http://www.rstudio.com">RStudio</a> with R version 3.6.0 (2019-04-26) running on x86_64-redhat-linux-gnu.</p> +<p>Analysis completed in 221.84 seconds ( 3.7 minutes) using <a href="https://cran.r-project.org/package=knitr">knitr</a> in <a href="http://www.rstudio.com">RStudio</a> with R version 3.6.0 (2019-04-26) running on x86_64-redhat-linux-gnu.</p> </div> <div id="r-environment" class="section level1"> <h1>R environment</h1> @@ -2360,18 +3143,19 @@ nrow(aggDT[propExpected == 1])</code></pre> ## [1] storr_1.2.1 progress_1.2.2 tidyselect_1.1.0 xfun_0.14 ## [5] repr_1.1.0 purrr_0.3.4 colorspace_1.4-0 vctrs_0.3.1 ## [9] generics_0.0.2 viridisLite_0.3.0 htmltools_0.3.6 yaml_2.2.0 -## [13] base64enc_0.1-3 rlang_0.4.6 pillar_1.4.4 txtq_0.2.0 -## [17] glue_1.4.1 withr_2.1.2 lifecycle_0.2.0 stringr_1.4.0 -## [21] munsell_0.5.0 gtable_0.2.0 rvest_0.3.5 evaluate_0.14 -## [25] labeling_0.3 knitr_1.28 parallel_3.6.0 fansi_0.4.0 -## [29] highr_0.7 Rcpp_1.0.1 readr_1.3.1 scales_1.0.0 -## [33] backports_1.1.3 filelock_1.0.2 webshot_0.5.2 jsonlite_1.6 -## [37] digest_0.6.25 stringi_1.2.4 dplyr_1.0.0 grid_3.6.0 -## [41] rprojroot_1.3-2 cli_2.0.2 tools_3.6.0 magrittr_1.5 -## [45] base64url_1.4 tibble_3.0.1 crayon_1.3.4 pkgconfig_2.0.2 -## [49] ellipsis_0.3.1 xml2_1.3.2 prettyunits_1.0.2 httr_1.4.1 -## [53] assertthat_0.2.0 rmarkdown_2.2 rstudioapi_0.11 R6_2.3.0 -## [57] igraph_1.2.2 compiler_3.6.0</code></pre> +## [13] base64enc_0.1-3 rlang_0.4.6 R.oo_1.22.0 pillar_1.4.4 +## [17] txtq_0.2.0 glue_1.4.1 withr_2.1.2 R.utils_2.7.0 +## [21] lifecycle_0.2.0 stringr_1.4.0 munsell_0.5.0 gtable_0.2.0 +## [25] rvest_0.3.5 R.methodsS3_1.7.1 codetools_0.2-16 evaluate_0.14 +## [29] labeling_0.3 knitr_1.28 parallel_3.6.0 fansi_0.4.0 +## [33] highr_0.7 Rcpp_1.0.1 readr_1.3.1 scales_1.0.0 +## [37] backports_1.1.3 filelock_1.0.2 webshot_0.5.2 jsonlite_1.6 +## [41] digest_0.6.25 stringi_1.2.4 dplyr_1.0.0 grid_3.6.0 +## [45] rprojroot_1.3-2 cli_2.0.2 tools_3.6.0 magrittr_1.5 +## [49] base64url_1.4 tibble_3.0.1 crayon_1.3.4 pkgconfig_2.0.2 +## [53] ellipsis_0.3.1 xml2_1.3.2 prettyunits_1.0.2 httr_1.4.1 +## [57] assertthat_0.2.0 rmarkdown_2.2 rstudioapi_0.11 R6_2.3.0 +## [61] igraph_1.2.2 compiler_3.6.0</code></pre> </div> </div> <div id="the-raw-data-cleaning-code" class="section level1"> diff --git a/docs/cleaningFeederData_allData.tex b/docs/cleaningFeederData_allData.tex index 6db441e85ad06170a8b9aff0165dec4d94a824df..8528c853b0f3d1118c085a7e51b0fa13f91dc66d 100644 --- a/docs/cleaningFeederData_allData.tex +++ b/docs/cleaningFeederData_allData.tex @@ -110,12 +110,26 @@ \apptocmd{\@title}{\par {\large #1 \par}}{}{} } \makeatother +\usepackage{booktabs} +\usepackage{longtable} +\usepackage{array} +\usepackage{multirow} +\usepackage{wrapfig} +\usepackage{float} +\usepackage{colortbl} +\usepackage{pdflscape} +\usepackage{tabu} +\usepackage{threeparttable} +\usepackage{threeparttablex} +\usepackage[normalem]{ulem} +\usepackage{makecell} +\usepackage{xcolor} \title{Testing electricity substation/feeder data} \providecommand{\subtitle}[1]{} \subtitle{Outliers and missing data\ldots{}} \author{Ben Anderson \& Ellis Ridett} -\date{Last run at: 2020-07-08 22:56:02} +\date{Last run at: 2020-07-09 00:07:52} \begin{document} \maketitle @@ -139,1041 +153,267 @@ \end{Shaded} \begin{verbatim} -## kableExtra -## TRUE -\end{verbatim} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{# Parameters ----} -\CommentTok{#dFile <- "~/Dropbox/Ben_IOW_SS.csv" # edit for your set up} - - -\CommentTok{# Functions ----} -\CommentTok{# put more general ones that could be useful to everyone in /R so they are built into the package.} - -\CommentTok{# put functions relevant to this analysis here} -\end{Highlighting} -\end{Shaded} - -\section{Intro}\label{intro} - -We have some electricity substation feeder data that has been cleaned to -give mean kW per 15 minutes. - -There seem to be some NA kW values and a lot of missing time stamps. We -want to select the `best' (i.e most complete) days within a -day-of-the-week/season/year sampling frame. If we can't do that we may -have to resort to seasonal mean kW profiles by hour \& day of the -week\ldots{} - -Code used to generate this report: -\url{https://git.soton.ac.uk/ba1e12/spatialec/-/blob/master/isleOfWight/cleaningFeederData.Rmd} - -\section{Data prep}\label{data-prep} - -\subsection{Load data}\label{load-data} - -Loaded data from -/mnt/SERG\_data/Ellis\_IOW/Cleaned\_SS\_Amps/amps\_all\_substations.csv.gz\ldots{} -(using drake) - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{origDataDT <-}\StringTok{ }\NormalTok{drake}\OperatorTok{::}\KeywordTok{readd}\NormalTok{(origData) }\CommentTok{# readd the drake object} -\KeywordTok{head}\NormalTok{(origDataDT)} -\end{Highlighting} -\end{Shaded} - -\begin{verbatim} -## Time region sub_region rDateTime rTime -## 1: 2003-01-13T10:30:00Z ARRN ARRN 2003-01-13 10:30:00 10:30:00 -## 2: 2003-01-13T10:45:00Z ARRN ARRN 2003-01-13 10:45:00 10:45:00 -## 3: 2003-01-13T11:15:00Z ARRN ARRN 2003-01-13 11:15:00 11:15:00 -## 4: 2003-01-13T11:30:00Z ARRN ARRN 2003-01-13 11:30:00 11:30:00 -## 5: 2003-01-13T11:45:00Z ARRN ARRN 2003-01-13 11:45:00 11:45:00 -## 6: 2003-01-13T12:15:00Z ARRN ARRN 2003-01-13 12:15:00 12:15:00 -## rDate rYear rDoW kW feeder_ID season -## 1: 2003-01-13 2003 Mon 2.0000000 ARRN_ARRN Winter -## 2: 2003-01-13 2003 Mon 18.2500000 ARRN_ARRN Winter -## 3: 2003-01-13 2003 Mon 0.6666667 ARRN_ARRN Winter -## 4: 2003-01-13 2003 Mon 28.5000000 ARRN_ARRN Winter -## 5: 2003-01-13 2003 Mon 19.5555556 ARRN_ARRN Winter -## 6: 2003-01-13 2003 Mon 12.8000000 ARRN_ARRN Winter -\end{verbatim} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{uniqDataDT <-}\StringTok{ }\NormalTok{drake}\OperatorTok{::}\KeywordTok{readd}\NormalTok{(uniqData) }\CommentTok{# readd the drake object} -\end{Highlighting} -\end{Shaded} - -Check data prep worked OK. - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{# check} -\NormalTok{t <-}\StringTok{ }\NormalTok{origDataDT[, .(}\DataTypeTok{nObs =}\NormalTok{ .N,} - \DataTypeTok{firstDate =} \KeywordTok{min}\NormalTok{(rDateTime),} - \DataTypeTok{lastDate =} \KeywordTok{max}\NormalTok{(rDateTime),} - \DataTypeTok{meankW =} \KeywordTok{mean}\NormalTok{(kW, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{)} -\NormalTok{), keyby =}\StringTok{ }\NormalTok{.(region, feeder_ID)]} - -\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(t, }\DataTypeTok{digits =} \DecValTok{2}\NormalTok{,} - \DataTypeTok{caption =} \StringTok{"Counts per feeder (long table)"}\NormalTok{) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} -\end{Highlighting} -\end{Shaded} - -Counts per feeder (long table) - -region - -feeder\_ID - -nObs - -firstDate - -lastDate - -meankW - -ARRN - -ARRN\_ARRN - -94909 - -2003-01-13 10:30:00 - -2017-10-25 22:15:00 - -151.74 - -BINS - -BINS\_C1T0 - -218480 - -2001-09-21 07:30:00 - -2017-10-13 23:45:00 - -94.70 - -BINS - -BINS\_C2T0 - -208447 - -2001-09-21 07:30:00 - -2017-10-13 23:45:00 - -93.91 - -BINS - -BINS\_E1L5 - -414980 - -2001-09-21 07:30:00 - -2017-10-13 23:15:00 - -94.31 - -BINS - -BINS\_E2L5 - -115260 - -2001-10-10 12:00:00 - -2017-10-04 20:45:00 - -20.58 - -BINS - -BINS\_E3L5 - -337064 - -2001-10-10 12:00:00 - -2017-10-14 23:30:00 - -59.67 - -FFPV - -FFPV\_FFPV - -32278 - -2014-09-25 09:15:00 - -2017-10-11 16:15:00 - -36.14 - -FRES - -FRES\_E1L5 - -452480 - -2001-10-10 12:00:00 - -2017-10-13 23:45:00 - -53.08 - -FRES - -FRES\_E1T0 - -188186 - -2001-09-11 15:15:00 - -2017-09-01 19:00:00 - -128.98 - -FRES - -FRES\_E2L5 - -178744 - -2001-10-10 12:00:00 - -2017-10-13 23:30:00 - -25.64 - -FRES - -FRES\_E2T0 - -164910 - -2001-09-11 15:15:00 - -2017-10-12 23:45:00 - -122.44 - -FRES - -FRES\_E3L5 - -463006 - -2001-10-10 12:00:00 - -2017-10-13 23:00:00 - -50.65 - -FRES - -FRES\_E4L5 - -15752 - -2010-07-30 17:00:00 - -2017-09-18 19:45:00 - -60.89 - -FRES - -FRES\_E6L5 - -317352 - -2001-09-11 15:15:00 - -2017-10-13 23:00:00 - -85.32 - -NEWP - -NEWP\_E11L5 - -367422 - -2005-01-20 10:00:00 - -2017-09-28 23:15:00 - -72.32 - -NEWP - -NEWP\_E13L5 - -252979 - -2010-01-01 00:15:00 - -2017-09-28 23:45:00 - -126.20 - -NEWP - -NEWP\_E15L5 - -295094 - -2008-01-07 12:00:00 - -2017-10-10 23:45:00 - -76.95 - -NEWP - -NEWP\_E17L5 - -63422 - -2011-03-10 12:45:00 - -2017-10-11 23:30:00 - -11.44 - -NEWP - -NEWP\_E19L5 - -126299 - -2011-03-14 09:45:00 - -2017-10-11 23:45:00 - -18.38 - -NEWP - -NEWP\_E1L5 - -318151 - -2001-10-10 12:15:00 - -2017-09-26 23:45:00 - -45.66 - -NEWP - -NEWP\_E1T0 - -101494 - -2001-09-11 15:30:00 - -2017-09-18 19:45:00 - -475.07 - -NEWP - -NEWP\_E2L5 - -67835 - -2001-09-11 15:30:00 - -2017-09-26 22:45:00 - -58.44 - -NEWP - -NEWP\_E2T0 - -399812 - -2001-10-10 12:15:00 - -2017-09-27 12:00:00 - -426.55 - -NEWP - -NEWP\_E3L5 - -480643 - -2001-10-10 12:15:00 - -2017-09-26 23:45:00 - -73.64 - -NEWP - -NEWP\_E3T0 - -246265 - -2005-08-03 11:15:00 - -2017-09-26 23:45:00 - -383.05 - -NEWP - -NEWP\_E4L5 - -191514 - -NA - -NA - -105.57 - -NEWP - -NEWP\_E5L5 - -448392 - -2001-09-11 15:15:00 - -2017-09-27 23:45:00 - -42.46 - -NEWP - -NEWP\_E6L5 - -434217 - -2001-09-11 15:30:00 - -2017-09-27 23:45:00 - -69.91 - -NEWP - -NEWP\_E7L5 - -306799 - -2001-10-10 12:15:00 - -2017-09-27 23:15:00 - -71.96 - -NEWP - -NEWP\_E8L5 - -537871 - -2001-10-10 12:15:00 - -2017-09-27 23:30:00 - -139.40 - -NEWP - -NEWP\_E9L5 - -363063 - -2002-12-19 22:30:00 - -2017-09-28 23:45:00 - -101.30 - -RYDE - -RYDE\_E1L5 - -356616 - -2001-09-21 09:30:00 - -2017-10-11 23:45:00 - -70.48 - -RYDE - -RYDE\_E1T0 \&E1S0 - -251062 - -2001-10-10 12:15:00 - -2017-10-11 23:30:00 - -336.55 - -RYDE - -RYDE\_E2L5 - -297293 - -NA - -NA - -71.14 - -RYDE - -RYDE\_E2T0 - -238332 - -2001-10-10 12:15:00 - -2017-10-11 23:45:00 - -351.26 - -RYDE - -RYDE\_E3L5 - -304293 - -2001-09-21 09:30:00 - -2017-10-11 23:45:00 - -85.22 - -RYDE - -RYDE\_E4L5 - -519366 - -NA - -NA - -70.23 - -RYDE - -RYDE\_E5L5 - -362368 - -2001-09-21 09:30:00 - -2017-10-12 23:15:00 - -82.05 - -RYDE - -RYDE\_E6L5 - -442859 - -2001-09-21 09:30:00 - -2017-10-12 23:45:00 - -96.24 - -RYDE - -RYDE\_E7L5 - -324195 - -2001-09-21 09:30:00 - -2017-10-12 22:45:00 - -69.86 - -RYDE - -RYDE\_E8L5 - -275373 - -2001-10-10 12:15:00 - -2017-10-12 23:15:00 - -57.04 - -RYDE - -RYDE\_E9L5 - -267617 - -2001-09-25 17:00:00 - -2017-10-12 23:30:00 - -59.20 - -SADO - -SADO\_E1L5 - -212775 - -2001-09-21 13:30:00 - -2017-10-25 23:15:00 - -50.98 - -SADO - -SADO\_E1T0 - -421960 - -2001-09-21 13:30:00 - -2017-10-25 23:45:00 - -230.66 - -SADO - -SADO\_E2L5 - -178715 - -2001-09-21 13:30:00 - -2017-10-25 23:15:00 - -39.74 - -SADO - -SADO\_E2T0 - -412191 - -2001-10-10 12:15:00 - -2017-10-25 23:30:00 - -173.51 - -SADO - -SADO\_E3L5 - -272831 - -2001-09-21 13:30:00 - -2017-10-25 23:15:00 - -64.61 - -SADO - -SADO\_E4L5 - -479020 - -2001-09-21 13:30:00 - -2017-10-25 23:45:00 - -58.38 - -SADO - -SADO\_E5L5 - -343918 - -2001-09-21 13:30:00 - -2017-10-25 23:45:00 - -82.67 - -SADO - -SADO\_E6L5 - -239227 - -2001-09-21 13:30:00 - -2017-10-25 23:30:00 - -56.34 - -SADO - -SADO\_E8L5 - -282455 - -2004-08-16 17:45:00 - -2017-10-25 23:30:00 - -89.57 - -SHAL - -SHAL\_C3L5 - -163204 - -2001-10-10 12:45:00 - -2017-10-15 23:15:00 - -38.22 - -SHAL - -SHAL\_C4L5 - -187940 - -2001-09-11 15:30:00 - -2017-10-15 23:45:00 - -38.77 - -SHAL - -SHAL\_C5L5 - -29417 - -2015-12-03 15:00:00 - -2017-10-15 23:45:00 - -36.35 - -SHAL - -SHAL\_E1L5 - -465913 - -2001-10-10 12:15:00 - -2017-10-14 23:30:00 - -70.65 - -SHAL - -SHAL\_E1T0 - -181132 - -2001-10-10 12:15:00 - -2017-10-14 23:15:00 - -101.23 - -SHAL - -SHAL\_E2L5 - -290286 - -2001-10-10 12:15:00 - -2017-10-15 23:00:00 - -47.09 - -SHAL - -SHAL\_E2T0 - -174129 - -2001-10-10 12:30:00 - -2017-10-14 22:45:00 - -107.44 - -SHAL - -SHAL\_E3L5 - -258805 - -2010-03-11 07:00:00 - -2017-10-15 23:45:00 - -33.26 - -SHAL - -SHAL\_E4L5 - -322135 - -2001-09-11 15:30:00 - -2017-10-16 12:30:00 - -54.03 - -SHAN - -SHAN\_E1L5 - -288894 - -2001-09-21 14:15:00 - -2017-10-24 23:15:00 - -63.52 - -SHAN - -SHAN\_E1T0 - -330691 - -2001-10-10 12:15:00 - -2017-10-24 23:45:00 - -226.58 - -SHAN - -SHAN\_E2L5 - -321760 - -2001-09-21 14:15:00 - -2017-10-25 23:15:00 - -72.63 - -SHAN - -SHAN\_E2T0 - -315053 - -2001-10-10 12:15:00 - -2017-10-24 23:45:00 - -186.69 - -SHAN - -SHAN\_E3L5 - -105606 - -2001-09-21 14:15:00 - -2017-10-25 23:15:00 - -26.30 - -SHAN - -SHAN\_E4L5 - -216626 - -2001-09-21 14:15:00 - -2017-10-25 23:30:00 - -33.63 - -SHAN - -SHAN\_E5L5 - -254742 - -2001-09-21 14:15:00 - -2017-10-25 23:15:00 - -48.50 - -SHAN - -SHAN\_E6L5 - -363107 - -2001-09-21 14:15:00 - -2017-10-25 23:15:00 - -68.69 - -SHAN - -SHAN\_E7L5 - -384165 - -2001-09-21 14:15:00 - -2017-10-25 23:15:00 - -66.12 - -SHAN - -SHAN\_E8L5 - -146605 - -2002-02-05 17:30:00 - -2017-10-25 23:15:00 - -25.28 - -VENT - -VENT\_E1L5 - -203617 - -2001-09-11 15:45:00 - -2017-10-15 23:15:00 - -33.24 - -VENT - -VENT\_E1T0 - -240745 - -2001-09-11 15:30:00 - -2017-10-15 23:15:00 - -191.42 - -VENT - -VENT\_E2L5 - -402307 - -2001-09-27 14:00:00 - -2017-10-16 23:45:00 - -46.68 - -VENT - -VENT\_E2T0 - -208020 - -2001-09-11 15:45:00 - -2017-10-15 23:30:00 - -115.47 - -VENT - -VENT\_E3L5 - -493337 - -2001-09-11 15:45:00 +## Loading required package: kableExtra +\end{verbatim} -2017-10-16 23:45:00 +\begin{verbatim} +## kableExtra +## TRUE +\end{verbatim} -83.59 +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{# Parameters ----} +\CommentTok{#dFile <- "~/Dropbox/Ben_IOW_SS.csv" # edit for your set up} -VENT -VENT\_E4L5 +\CommentTok{# Functions ----} +\CommentTok{# put more general ones that could be useful to everyone in /R so they are built into the package.} -387037 +\CommentTok{# put functions relevant to this analysis here} +\end{Highlighting} +\end{Shaded} -2001-09-11 15:45:00 +\section{Intro}\label{intro} -2017-10-16 23:30:00 +We have some electricity substation feeder data that has been cleaned to +give mean kW per 15 minutes. -40.86 +There seem to be some NA kW values and a lot of missing time stamps. We +want to select the `best' (i.e most complete) days within a +day-of-the-week/season/year sampling frame. If we can't do that we may +have to resort to seasonal mean kW profiles by hour \& day of the +week\ldots{} -VENT +Code used to generate this report: +\url{https://git.soton.ac.uk/ba1e12/spatialec/-/blob/master/isleOfWight/cleaningFeederData.Rmd} -VENT\_E5L5 +\section{Data prep}\label{data-prep} -481677 +\subsection{Load data}\label{load-data} -2001-09-27 14:00:00 +Loaded data from +/mnt/SERG\_data/Ellis\_IOW/Cleaned\_SS\_Amps/amps\_all\_substations.csv.gz\ldots{} +(using drake) -2017-10-16 23:45:00 +\begin{Shaded} +\begin{Highlighting}[] +\NormalTok{origDataDT <-}\StringTok{ }\NormalTok{drake}\OperatorTok{::}\KeywordTok{readd}\NormalTok{(origData) }\CommentTok{# readd the drake object} -88.43 +\NormalTok{uniqDataDT <-}\StringTok{ }\NormalTok{drake}\OperatorTok{::}\KeywordTok{readd}\NormalTok{(uniqData) }\CommentTok{# readd the drake object} -VENT +\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(}\KeywordTok{head}\NormalTok{(origDataDT), }\DataTypeTok{digits =} \DecValTok{2}\NormalTok{,} + \DataTypeTok{caption =} \StringTok{"Counts per feeder (long table)"}\NormalTok{) }\OperatorTok{%>%} +\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} +\end{Highlighting} +\end{Shaded} -VENT\_E6L5 +\begin{table} + +\caption{\label{tab:loadData}Counts per feeder (long table)} +\centering +\begin{tabular}[t]{l|l|l|l|l|l|r|l|r|l|l} +\hline +Time & region & sub\_region & rDateTime & rTime & rDate & rYear & rDoW & kW & feeder\_ID & season\\ +\hline +2003-01-13T10:30:00Z & ARRN & ARRN & 2003-01-13 10:30:00 & 10:30:00 & 2003-01-13 & 2003 & Mon & 2.00 & ARRN\_ARRN & Winter\\ +\hline +2003-01-13T10:45:00Z & ARRN & ARRN & 2003-01-13 10:45:00 & 10:45:00 & 2003-01-13 & 2003 & Mon & 18.25 & ARRN\_ARRN & Winter\\ +\hline +2003-01-13T11:15:00Z & ARRN & ARRN & 2003-01-13 11:15:00 & 11:15:00 & 2003-01-13 & 2003 & Mon & 0.67 & ARRN\_ARRN & Winter\\ +\hline +2003-01-13T11:30:00Z & ARRN & ARRN & 2003-01-13 11:30:00 & 11:30:00 & 2003-01-13 & 2003 & Mon & 28.50 & ARRN\_ARRN & Winter\\ +\hline +2003-01-13T11:45:00Z & ARRN & ARRN & 2003-01-13 11:45:00 & 11:45:00 & 2003-01-13 & 2003 & Mon & 19.56 & ARRN\_ARRN & Winter\\ +\hline +2003-01-13T12:15:00Z & ARRN & ARRN & 2003-01-13 12:15:00 & 12:15:00 & 2003-01-13 & 2003 & Mon & 12.80 & ARRN\_ARRN & Winter\\ +\hline +\end{tabular} +\end{table} -6631 +Check data prep worked OK. -2001-09-27 14:00:00 +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{# check} +\NormalTok{t <-}\StringTok{ }\NormalTok{origDataDT[, .(}\DataTypeTok{nObs =}\NormalTok{ .N,} + \DataTypeTok{firstDate =} \KeywordTok{min}\NormalTok{(rDateTime, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{),} + \DataTypeTok{lastDate =} \KeywordTok{max}\NormalTok{(rDateTime, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{),} + \DataTypeTok{meankW =} \KeywordTok{mean}\NormalTok{(kW, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{)} +\NormalTok{), keyby =}\StringTok{ }\NormalTok{.(region, feeder_ID)]} -2017-10-24 20:15:00 +\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(t, }\DataTypeTok{digits =} \DecValTok{2}\NormalTok{,} + \DataTypeTok{caption =} \StringTok{"Counts per feeder (long table)"}\NormalTok{) }\OperatorTok{%>%} +\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} +\end{Highlighting} +\end{Shaded} -3.95 +\begin{table} + +\caption{\label{tab:dataPrep}Counts per feeder (long table)} +\centering +\begin{tabular}[t]{l|l|r|l|l|r} +\hline +region & feeder\_ID & nObs & firstDate & lastDate & meankW\\ +\hline +ARRN & ARRN\_ARRN & 94909 & 2003-01-13 10:30:00 & 2017-10-25 22:15:00 & 151.74\\ +\hline +BINS & BINS\_C1T0 & 218480 & 2001-09-21 07:30:00 & 2017-10-13 23:45:00 & 94.70\\ +\hline +BINS & BINS\_C2T0 & 208447 & 2001-09-21 07:30:00 & 2017-10-13 23:45:00 & 93.91\\ +\hline +BINS & BINS\_E1L5 & 414980 & 2001-09-21 07:30:00 & 2017-10-13 23:15:00 & 94.31\\ +\hline +BINS & BINS\_E2L5 & 115260 & 2001-10-10 12:00:00 & 2017-10-04 20:45:00 & 20.58\\ +\hline +BINS & BINS\_E3L5 & 337064 & 2001-10-10 12:00:00 & 2017-10-14 23:30:00 & 59.67\\ +\hline +FFPV & FFPV\_FFPV & 32278 & 2014-09-25 09:15:00 & 2017-10-11 16:15:00 & 36.14\\ +\hline +FRES & FRES\_E1L5 & 452480 & 2001-10-10 12:00:00 & 2017-10-13 23:45:00 & 53.08\\ +\hline +FRES & FRES\_E1T0 & 188186 & 2001-09-11 15:15:00 & 2017-09-01 19:00:00 & 128.98\\ +\hline +FRES & FRES\_E2L5 & 178744 & 2001-10-10 12:00:00 & 2017-10-13 23:30:00 & 25.64\\ +\hline +FRES & FRES\_E2T0 & 164910 & 2001-09-11 15:15:00 & 2017-10-12 23:45:00 & 122.44\\ +\hline +FRES & FRES\_E3L5 & 463006 & 2001-10-10 12:00:00 & 2017-10-13 23:00:00 & 50.65\\ +\hline +FRES & FRES\_E4L5 & 15752 & 2010-07-30 17:00:00 & 2017-09-18 19:45:00 & 60.89\\ +\hline +FRES & FRES\_E6L5 & 317352 & 2001-09-11 15:15:00 & 2017-10-13 23:00:00 & 85.32\\ +\hline +NEWP & NEWP\_E11L5 & 367422 & 2005-01-20 10:00:00 & 2017-09-28 23:15:00 & 72.32\\ +\hline +NEWP & NEWP\_E13L5 & 252979 & 2010-01-01 00:15:00 & 2017-09-28 23:45:00 & 126.20\\ +\hline +NEWP & NEWP\_E15L5 & 295094 & 2008-01-07 12:00:00 & 2017-10-10 23:45:00 & 76.95\\ +\hline +NEWP & NEWP\_E17L5 & 63422 & 2011-03-10 12:45:00 & 2017-10-11 23:30:00 & 11.44\\ +\hline +NEWP & NEWP\_E19L5 & 126299 & 2011-03-14 09:45:00 & 2017-10-11 23:45:00 & 18.38\\ +\hline +NEWP & NEWP\_E1L5 & 318151 & 2001-10-10 12:15:00 & 2017-09-26 23:45:00 & 45.66\\ +\hline +NEWP & NEWP\_E1T0 & 101494 & 2001-09-11 15:30:00 & 2017-09-18 19:45:00 & 475.07\\ +\hline +NEWP & NEWP\_E2L5 & 67835 & 2001-09-11 15:30:00 & 2017-09-26 22:45:00 & 58.44\\ +\hline +NEWP & NEWP\_E2T0 & 399812 & 2001-10-10 12:15:00 & 2017-09-27 12:00:00 & 426.55\\ +\hline +NEWP & NEWP\_E3L5 & 480643 & 2001-10-10 12:15:00 & 2017-09-26 23:45:00 & 73.64\\ +\hline +NEWP & NEWP\_E3T0 & 246265 & 2005-08-03 11:15:00 & 2017-09-26 23:45:00 & 383.05\\ +\hline +NEWP & NEWP\_E4L5 & 191514 & 2001-09-11 15:15:00 & 2020-12-31 07:15:00 & 105.57\\ +\hline +NEWP & NEWP\_E5L5 & 448392 & 2001-09-11 15:15:00 & 2017-09-27 23:45:00 & 42.46\\ +\hline +NEWP & NEWP\_E6L5 & 434217 & 2001-09-11 15:30:00 & 2017-09-27 23:45:00 & 69.91\\ +\hline +NEWP & NEWP\_E7L5 & 306799 & 2001-10-10 12:15:00 & 2017-09-27 23:15:00 & 71.96\\ +\hline +NEWP & NEWP\_E8L5 & 537871 & 2001-10-10 12:15:00 & 2017-09-27 23:30:00 & 139.40\\ +\hline +NEWP & NEWP\_E9L5 & 363063 & 2002-12-19 22:30:00 & 2017-09-28 23:45:00 & 101.30\\ +\hline +RYDE & RYDE\_E1L5 & 356616 & 2001-09-21 09:30:00 & 2017-10-11 23:45:00 & 70.48\\ +\hline +RYDE & RYDE\_E1T0 \&E1S0 & 251062 & 2001-10-10 12:15:00 & 2017-10-11 23:30:00 & 336.55\\ +\hline +RYDE & RYDE\_E2L5 & 297293 & 2001-09-21 09:30:00 & 2017-10-11 23:45:00 & 71.14\\ +\hline +RYDE & RYDE\_E2T0 & 238332 & 2001-10-10 12:15:00 & 2017-10-11 23:45:00 & 351.26\\ +\hline +RYDE & RYDE\_E3L5 & 304293 & 2001-09-21 09:30:00 & 2017-10-11 23:45:00 & 85.22\\ +\hline +RYDE & RYDE\_E4L5 & 519366 & 2001-12-20 15:30:00 & 2017-10-12 23:45:00 & 70.23\\ +\hline +RYDE & RYDE\_E5L5 & 362368 & 2001-09-21 09:30:00 & 2017-10-12 23:15:00 & 82.05\\ +\hline +RYDE & RYDE\_E6L5 & 442859 & 2001-09-21 09:30:00 & 2017-10-12 23:45:00 & 96.24\\ +\hline +RYDE & RYDE\_E7L5 & 324195 & 2001-09-21 09:30:00 & 2017-10-12 22:45:00 & 69.86\\ +\hline +RYDE & RYDE\_E8L5 & 275373 & 2001-10-10 12:15:00 & 2017-10-12 23:15:00 & 57.04\\ +\hline +RYDE & RYDE\_E9L5 & 267617 & 2001-09-25 17:00:00 & 2017-10-12 23:30:00 & 59.20\\ +\hline +SADO & SADO\_E1L5 & 212775 & 2001-09-21 13:30:00 & 2017-10-25 23:15:00 & 50.98\\ +\hline +SADO & SADO\_E1T0 & 421960 & 2001-09-21 13:30:00 & 2017-10-25 23:45:00 & 230.66\\ +\hline +SADO & SADO\_E2L5 & 178715 & 2001-09-21 13:30:00 & 2017-10-25 23:15:00 & 39.74\\ +\hline +SADO & SADO\_E2T0 & 412191 & 2001-10-10 12:15:00 & 2017-10-25 23:30:00 & 173.51\\ +\hline +SADO & SADO\_E3L5 & 272831 & 2001-09-21 13:30:00 & 2017-10-25 23:15:00 & 64.61\\ +\hline +SADO & SADO\_E4L5 & 479020 & 2001-09-21 13:30:00 & 2017-10-25 23:45:00 & 58.38\\ +\hline +SADO & SADO\_E5L5 & 343918 & 2001-09-21 13:30:00 & 2017-10-25 23:45:00 & 82.67\\ +\hline +SADO & SADO\_E6L5 & 239227 & 2001-09-21 13:30:00 & 2017-10-25 23:30:00 & 56.34\\ +\hline +SADO & SADO\_E8L5 & 282455 & 2004-08-16 17:45:00 & 2017-10-25 23:30:00 & 89.57\\ +\hline +SHAL & SHAL\_C3L5 & 163204 & 2001-10-10 12:45:00 & 2017-10-15 23:15:00 & 38.22\\ +\hline +SHAL & SHAL\_C4L5 & 187940 & 2001-09-11 15:30:00 & 2017-10-15 23:45:00 & 38.77\\ +\hline +SHAL & SHAL\_C5L5 & 29417 & 2015-12-03 15:00:00 & 2017-10-15 23:45:00 & 36.35\\ +\hline +SHAL & SHAL\_E1L5 & 465913 & 2001-10-10 12:15:00 & 2017-10-14 23:30:00 & 70.65\\ +\hline +SHAL & SHAL\_E1T0 & 181132 & 2001-10-10 12:15:00 & 2017-10-14 23:15:00 & 101.23\\ +\hline +SHAL & SHAL\_E2L5 & 290286 & 2001-10-10 12:15:00 & 2017-10-15 23:00:00 & 47.09\\ +\hline +SHAL & SHAL\_E2T0 & 174129 & 2001-10-10 12:30:00 & 2017-10-14 22:45:00 & 107.44\\ +\hline +SHAL & SHAL\_E3L5 & 258805 & 2010-03-11 07:00:00 & 2017-10-15 23:45:00 & 33.26\\ +\hline +SHAL & SHAL\_E4L5 & 322135 & 2001-09-11 15:30:00 & 2017-10-16 12:30:00 & 54.03\\ +\hline +SHAN & SHAN\_E1L5 & 288894 & 2001-09-21 14:15:00 & 2017-10-24 23:15:00 & 63.52\\ +\hline +SHAN & SHAN\_E1T0 & 330691 & 2001-10-10 12:15:00 & 2017-10-24 23:45:00 & 226.58\\ +\hline +SHAN & SHAN\_E2L5 & 321760 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 72.63\\ +\hline +SHAN & SHAN\_E2T0 & 315053 & 2001-10-10 12:15:00 & 2017-10-24 23:45:00 & 186.69\\ +\hline +SHAN & SHAN\_E3L5 & 105606 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 26.30\\ +\hline +SHAN & SHAN\_E4L5 & 216626 & 2001-09-21 14:15:00 & 2017-10-25 23:30:00 & 33.63\\ +\hline +SHAN & SHAN\_E5L5 & 254742 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 48.50\\ +\hline +SHAN & SHAN\_E6L5 & 363107 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 68.69\\ +\hline +SHAN & SHAN\_E7L5 & 384165 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 66.12\\ +\hline +SHAN & SHAN\_E8L5 & 146605 & 2002-02-05 17:30:00 & 2017-10-25 23:15:00 & 25.28\\ +\hline +VENT & VENT\_E1L5 & 203617 & 2001-09-11 15:45:00 & 2017-10-15 23:15:00 & 33.24\\ +\hline +VENT & VENT\_E1T0 & 240745 & 2001-09-11 15:30:00 & 2017-10-15 23:15:00 & 191.42\\ +\hline +VENT & VENT\_E2L5 & 402307 & 2001-09-27 14:00:00 & 2017-10-16 23:45:00 & 46.68\\ +\hline +VENT & VENT\_E2T0 & 208020 & 2001-09-11 15:45:00 & 2017-10-15 23:30:00 & 115.47\\ +\hline +VENT & VENT\_E3L5 & 493337 & 2001-09-11 15:45:00 & 2017-10-16 23:45:00 & 83.59\\ +\hline +VENT & VENT\_E4L5 & 387037 & 2001-09-11 15:45:00 & 2017-10-16 23:30:00 & 40.86\\ +\hline +VENT & VENT\_E5L5 & 481677 & 2001-09-27 14:00:00 & 2017-10-16 23:45:00 & 88.43\\ +\hline +VENT & VENT\_E6L5 & 6631 & 2001-09-27 14:00:00 & 2017-10-24 20:15:00 & 3.95\\ +\hline +\end{tabular} +\end{table} Do a duplicate check by feeder\_ID, dateTime \& kW. In theory there should not be any. @@ -1189,7 +429,7 @@ should not be any. \NormalTok{pc <-}\StringTok{ }\DecValTok{100}\OperatorTok{*}\NormalTok{((}\KeywordTok{nrow}\NormalTok{(origDataDT) }\OperatorTok{-}\StringTok{ }\KeywordTok{nrow}\NormalTok{(uniqDataDT))}\OperatorTok{/}\KeywordTok{nrow}\NormalTok{(origDataDT))} \KeywordTok{message}\NormalTok{(}\StringTok{"That's "}\NormalTok{, }\KeywordTok{round}\NormalTok{(pc,}\DecValTok{2}\NormalTok{), }\StringTok{"%"}\NormalTok{)} -\NormalTok{feederDT <-}\StringTok{ }\NormalTok{uniqDataDT }\CommentTok{# use dt with no duplicates} +\NormalTok{feederDT <-}\StringTok{ }\NormalTok{uniqDataDT[}\OperatorTok{!}\KeywordTok{is.na}\NormalTok{(rDateTime)] }\CommentTok{# use dt with no duplicates} \NormalTok{origDataDT <-}\StringTok{ }\OtherTok{NULL} \CommentTok{# save memory} \end{Highlighting} \end{Shaded} @@ -1325,18 +565,18 @@ and then seeing if the dateTimes are contiguous. \begin{verbatim} ## rDateTime rTime rDate -## Min. :2001-09-11 15:15:00 Length:549530 Min. :2001-09-11 +## Min. :2001-09-11 15:15:00 Length:549529 Min. :2001-09-11 ## 1st Qu.:2006-02-13 01:30:00 Class1:hms 1st Qu.:2006-02-13 ## Median :2010-01-20 06:00:00 Class2:difftime Median :2010-01-20 ## Mean :2010-01-05 16:51:28 Mode :numeric Mean :2010-01-05 ## 3rd Qu.:2013-12-22 21:30:00 3rd Qu.:2013-12-22 ## Max. :2020-12-31 07:15:00 Max. :2020-12-31 -## NA's :1 NA's :1 +## ## season nFeeders meankW dtDiff -## Spring:137919 Min. : 1.00 Min. : 0.00 Length:549530 -## Summer:132245 1st Qu.:31.00 1st Qu.: 80.40 Class :difftime +## Spring:137919 Min. : 1.00 Min. : 0.00 Length:549529 +## Summer:132245 1st Qu.:31.00 1st Qu.: 80.41 Class :difftime ## Autumn:141490 Median :39.00 Median : 96.95 Mode :numeric -## Winter:137876 Mean :39.72 Mean : 97.00 +## Winter:137875 Mean :39.72 Mean : 97.00 ## 3rd Qu.:47.00 3rd Qu.:113.00 ## Max. :77.00 Max. :439.56 ## NA's :1 @@ -1443,95 +683,38 @@ infer how many feeders are reporting: \begin{Shaded} \begin{Highlighting}[] \NormalTok{wDT <-}\StringTok{ }\NormalTok{drake}\OperatorTok{::}\KeywordTok{readd}\NormalTok{(wideData) }\CommentTok{# back from the drake} -\KeywordTok{head}\NormalTok{(wDT)} +\KeywordTok{names}\NormalTok{(wDT)} \end{Highlighting} \end{Shaded} \begin{verbatim} -## rDateTime ARRN_ARRN BINS_C1T0 BINS_C2T0 BINS_E1L5 BINS_E2L5 -## 1: 2001-09-11 15:15:00 NA NA NA NA NA -## 2: 2001-09-11 15:30:00 NA NA NA NA NA -## 3: 2001-09-11 15:45:00 NA NA NA NA NA -## 4: 2001-09-21 07:30:00 NA 0 0 0 NA -## 5: 2001-09-21 08:00:00 NA 0 0 0 NA -## 6: 2001-09-21 08:30:00 NA NA NA NA NA -## BINS_E3L5 FFPV_FFPV FRES_E1L5 FRES_E1T0 FRES_E2L5 FRES_E2T0 FRES_E3L5 -## 1: NA NA NA 0 NA 0 NA -## 2: NA NA NA NA NA NA NA -## 3: NA NA NA NA NA NA NA -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA NA NA 0 NA NA NA -## FRES_E4L5 FRES_E6L5 NEWP_E11L5 NEWP_E13L5 NEWP_E15L5 NEWP_E17L5 NEWP_E19L5 -## 1: NA 0 NA NA NA NA NA -## 2: NA NA NA NA NA NA NA -## 3: NA NA NA NA NA NA NA -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA 0 NA NA NA NA NA -## NEWP_E1L5 NEWP_E1T0 NEWP_E2L5 NEWP_E2T0 NEWP_E3L5 NEWP_E3T0 NEWP_E4L5 -## 1: NA NA NA NA NA NA 0 -## 2: NA 0 0 NA NA NA NA -## 3: NA NA NA NA NA NA NA -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA NA NA NA NA NA NA -## NEWP_E5L5 NEWP_E6L5 NEWP_E7L5 NEWP_E8L5 NEWP_E9L5 RYDE_E1L5 RYDE_E1T0 &E1S0 -## 1: 0 NA NA NA NA NA NA -## 2: NA 0 NA NA NA NA NA -## 3: NA NA NA NA NA NA NA -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA NA NA NA NA NA NA -## RYDE_E2L5 RYDE_E2T0 RYDE_E3L5 RYDE_E4L5 RYDE_E5L5 RYDE_E6L5 RYDE_E7L5 -## 1: NA NA NA NA NA NA NA -## 2: NA NA NA NA NA NA NA -## 3: NA NA NA NA NA NA NA -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA NA NA NA NA NA NA -## RYDE_E8L5 RYDE_E9L5 SADO_E1L5 SADO_E1T0 SADO_E2L5 SADO_E2T0 SADO_E3L5 -## 1: NA NA NA NA NA NA NA -## 2: NA NA NA NA NA NA NA -## 3: NA NA NA NA NA NA NA -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA NA NA NA NA NA NA -## SADO_E4L5 SADO_E5L5 SADO_E6L5 SADO_E8L5 SHAL_C3L5 SHAL_C4L5 SHAL_C5L5 -## 1: NA NA NA NA NA NA NA -## 2: NA NA NA NA NA 0 NA -## 3: NA NA NA NA NA NA NA -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA NA NA NA NA NA NA -## SHAL_E1L5 SHAL_E1T0 SHAL_E2L5 SHAL_E2T0 SHAL_E3L5 SHAL_E4L5 SHAN_E1L5 -## 1: NA NA NA NA NA NA NA -## 2: NA NA NA NA NA 0 NA -## 3: NA NA NA NA NA NA NA -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA NA NA NA NA NA NA -## SHAN_E1T0 SHAN_E2L5 SHAN_E2T0 SHAN_E3L5 SHAN_E4L5 SHAN_E5L5 SHAN_E6L5 -## 1: NA NA NA NA NA NA NA -## 2: NA NA NA NA NA NA NA -## 3: NA NA NA NA NA NA NA -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA NA NA NA NA NA NA -## SHAN_E7L5 SHAN_E8L5 VENT_E1L5 VENT_E1T0 VENT_E2L5 VENT_E2T0 VENT_E3L5 -## 1: NA NA NA NA NA NA NA -## 2: NA NA NA 0 NA NA NA -## 3: NA NA 0 0 NA 0 0 -## 4: NA NA NA NA NA NA NA -## 5: NA NA NA NA NA NA NA -## 6: NA NA NA NA NA NA NA -## VENT_E4L5 VENT_E5L5 VENT_E6L5 nNA nFeedersReporting -## 1: NA NA NA 73 5 -## 2: NA NA NA 72 6 -## 3: 0 NA NA 73 5 -## 4: NA NA NA 75 3 -## 5: NA NA NA 75 3 -## 6: NA NA NA 76 2 +## [1] "rDateTime" "ARRN_ARRN" "BINS_C1T0" +## [4] "BINS_C2T0" "BINS_E1L5" "BINS_E2L5" +## [7] "BINS_E3L5" "FFPV_FFPV" "FRES_E1L5" +## [10] "FRES_E1T0" "FRES_E2L5" "FRES_E2T0" +## [13] "FRES_E3L5" "FRES_E4L5" "FRES_E6L5" +## [16] "NEWP_E11L5" "NEWP_E13L5" "NEWP_E15L5" +## [19] "NEWP_E17L5" "NEWP_E19L5" "NEWP_E1L5" +## [22] "NEWP_E1T0" "NEWP_E2L5" "NEWP_E2T0" +## [25] "NEWP_E3L5" "NEWP_E3T0" "NEWP_E4L5" +## [28] "NEWP_E5L5" "NEWP_E6L5" "NEWP_E7L5" +## [31] "NEWP_E8L5" "NEWP_E9L5" "RYDE_E1L5" +## [34] "RYDE_E1T0 &E1S0" "RYDE_E2L5" "RYDE_E2T0" +## [37] "RYDE_E3L5" "RYDE_E4L5" "RYDE_E5L5" +## [40] "RYDE_E6L5" "RYDE_E7L5" "RYDE_E8L5" +## [43] "RYDE_E9L5" "SADO_E1L5" "SADO_E1T0" +## [46] "SADO_E2L5" "SADO_E2T0" "SADO_E3L5" +## [49] "SADO_E4L5" "SADO_E5L5" "SADO_E6L5" +## [52] "SADO_E8L5" "SHAL_C3L5" "SHAL_C4L5" +## [55] "SHAL_C5L5" "SHAL_E1L5" "SHAL_E1T0" +## [58] "SHAL_E2L5" "SHAL_E2T0" "SHAL_E3L5" +## [61] "SHAL_E4L5" "SHAN_E1L5" "SHAN_E1T0" +## [64] "SHAN_E2L5" "SHAN_E2T0" "SHAN_E3L5" +## [67] "SHAN_E4L5" "SHAN_E5L5" "SHAN_E6L5" +## [70] "SHAN_E7L5" "SHAN_E8L5" "VENT_E1L5" +## [73] "VENT_E1T0" "VENT_E2L5" "VENT_E2T0" +## [76] "VENT_E3L5" "VENT_E4L5" "VENT_E5L5" +## [79] "VENT_E6L5" "nNA" "nFeedersReporting" \end{verbatim} If we take the mean of the number of feeders reporting per day (date) @@ -1581,7 +764,8 @@ then a value of 25 will indicate a day when \emph{all} feeders have \begin{Shaded} \begin{Highlighting}[] \KeywordTok{message}\NormalTok{(}\StringTok{"How many days have 100%?"}\NormalTok{)} -\KeywordTok{nrow}\NormalTok{(aggDT[propExpected }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{])} +\NormalTok{n <-}\StringTok{ }\KeywordTok{nrow}\NormalTok{(aggDT[propExpected }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{])} +\NormalTok{n} \end{Highlighting} \end{Shaded} @@ -1589,6 +773,8 @@ then a value of 25 will indicate a day when \emph{all} feeders have ## [1] 0 \end{verbatim} +So, there are 0 days with 100\% data\ldots{} + If we plot the mean then we will see which days get closest to having a full dataset. @@ -1612,6 +798,135 @@ feeders * 24 hours * 4 per hour (will be the same shape): \includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/bestDaysProp-1.pdf} +\begin{Shaded} +\begin{Highlighting}[] +\NormalTok{aggDT[, rDoW }\OperatorTok{:}\ErrorTok{=}\StringTok{ }\NormalTok{lubridate}\OperatorTok{::}\KeywordTok{wday}\NormalTok{(rDate, }\DataTypeTok{lab =} \OtherTok{TRUE}\NormalTok{)]} +\NormalTok{h <-}\StringTok{ }\KeywordTok{head}\NormalTok{(aggDT[season }\OperatorTok{==}\StringTok{ "Spring"}\NormalTok{][}\KeywordTok{order}\NormalTok{(}\OperatorTok{-}\NormalTok{propExpected)])} +\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(h, }\DataTypeTok{caption =} \StringTok{"Best Spring days overall"}\NormalTok{, } + \DataTypeTok{digits =} \DecValTok{3}\NormalTok{) }\OperatorTok{%>%} +\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} +\end{Highlighting} +\end{Shaded} + +\begin{table} + +\caption{\label{tab:bestDaysProp}Best Spring days overall} +\centering +\begin{tabular}[t]{l|l|r|r|r|r|r|l} +\hline +rDate & season & meanOK & minOk & maxOk & sumOK & propExpected & rDoW\\ +\hline +2002-04-14 & Spring & 63.490 & 60 & 65 & 6095 & 0.814 & Sun\\ +\hline +2003-03-20 & Spring & 63.458 & 61 & 67 & 6092 & 0.814 & Thu\\ +\hline +2002-03-21 & Spring & 63.385 & 62 & 65 & 6085 & 0.813 & Thu\\ +\hline +2003-03-14 & Spring & 63.385 & 61 & 65 & 6085 & 0.813 & Fri\\ +\hline +2003-03-16 & Spring & 63.375 & 60 & 65 & 6084 & 0.812 & Sun\\ +\hline +2003-03-17 & Spring & 63.375 & 61 & 65 & 6084 & 0.812 & Mon\\ +\hline +\end{tabular} +\end{table} + +\begin{Shaded} +\begin{Highlighting}[] +\NormalTok{h <-}\StringTok{ }\KeywordTok{head}\NormalTok{(aggDT[season }\OperatorTok{==}\StringTok{ "Summer"}\NormalTok{][}\KeywordTok{order}\NormalTok{(}\OperatorTok{-}\NormalTok{propExpected)])} +\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(h, }\DataTypeTok{caption =} \StringTok{"Best Summer days overall"}\NormalTok{, } + \DataTypeTok{digits =} \DecValTok{3}\NormalTok{) }\OperatorTok{%>%} +\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} +\end{Highlighting} +\end{Shaded} + +\begin{table} + +\caption{\label{tab:bestDaysProp}Best Summer days overall} +\centering +\begin{tabular}[t]{l|l|r|r|r|r|r|l} +\hline +rDate & season & meanOK & minOk & maxOk & sumOK & propExpected & rDoW\\ +\hline +2003-08-22 & Summer & 63.854 & 56 & 65 & 6130 & 0.819 & Fri\\ +\hline +2003-08-30 & Summer & 63.844 & 60 & 66 & 6129 & 0.819 & Sat\\ +\hline +2003-08-31 & Summer & 63.812 & 59 & 66 & 6126 & 0.818 & Sun\\ +\hline +2003-08-23 & Summer & 63.677 & 56 & 66 & 6113 & 0.816 & Sat\\ +\hline +2003-08-25 & Summer & 63.677 & 55 & 66 & 6113 & 0.816 & Mon\\ +\hline +2003-08-26 & Summer & 63.656 & 58 & 66 & 6111 & 0.816 & Tue\\ +\hline +\end{tabular} +\end{table} + +\begin{Shaded} +\begin{Highlighting}[] +\NormalTok{h <-}\StringTok{ }\KeywordTok{head}\NormalTok{(aggDT[season }\OperatorTok{==}\StringTok{ "Autumn"}\NormalTok{][}\KeywordTok{order}\NormalTok{(}\OperatorTok{-}\NormalTok{propExpected)])} +\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(h, }\DataTypeTok{caption =} \StringTok{"Best Autumn days overall"}\NormalTok{,} + \DataTypeTok{digits =} \DecValTok{3}\NormalTok{) }\OperatorTok{%>%} +\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} +\end{Highlighting} +\end{Shaded} + +\begin{table} + +\caption{\label{tab:bestDaysProp}Best Autumn days overall} +\centering +\begin{tabular}[t]{l|l|r|r|r|r|r|l} +\hline +rDate & season & meanOK & minOk & maxOk & sumOK & propExpected & rDoW\\ +\hline +2003-09-02 & Autumn & 63.823 & 57 & 66 & 6127 & 0.818 & Tue\\ +\hline +2003-09-01 & Autumn & 63.771 & 56 & 65 & 6122 & 0.818 & Mon\\ +\hline +2003-09-07 & Autumn & 63.740 & 57 & 66 & 6119 & 0.817 & Sun\\ +\hline +2003-09-03 & Autumn & 63.667 & 57 & 65 & 6112 & 0.816 & Wed\\ +\hline +2003-09-04 & Autumn & 63.615 & 57 & 66 & 6107 & 0.816 & Thu\\ +\hline +2003-09-06 & Autumn & 63.552 & 57 & 65 & 6101 & 0.815 & Sat\\ +\hline +\end{tabular} +\end{table} + +\begin{Shaded} +\begin{Highlighting}[] +\NormalTok{h <-}\StringTok{ }\KeywordTok{head}\NormalTok{(aggDT[season }\OperatorTok{==}\StringTok{ "Winter"}\NormalTok{][}\KeywordTok{order}\NormalTok{(}\OperatorTok{-}\NormalTok{propExpected)])} +\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(h, }\DataTypeTok{caption =} \StringTok{"Best Winter days overall"}\NormalTok{, } + \DataTypeTok{digits =} \DecValTok{3}\NormalTok{) }\OperatorTok{%>%} +\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} +\end{Highlighting} +\end{Shaded} + +\begin{table} + +\caption{\label{tab:bestDaysProp}Best Winter days overall} +\centering +\begin{tabular}[t]{l|l|r|r|r|r|r|l} +\hline +rDate & season & meanOK & minOk & maxOk & sumOK & propExpected & rDoW\\ +\hline +2002-02-28 & Winter & 63.292 & 60 & 65 & 6076 & 0.811 & Thu\\ +\hline +2002-02-25 & Winter & 63.125 & 61 & 65 & 6060 & 0.809 & Mon\\ +\hline +2002-12-11 & Winter & 62.979 & 61 & 64 & 6046 & 0.807 & Wed\\ +\hline +2002-12-01 & Winter & 62.917 & 61 & 64 & 6040 & 0.807 & Sun\\ +\hline +2003-01-01 & Winter & 62.906 & 60 & 64 & 6039 & 0.806 & Wed\\ +\hline +2003-01-03 & Winter & 62.906 & 60 & 64 & 6039 & 0.806 & Fri\\ +\hline +\end{tabular} +\end{table} + This also tells us that there is some reason why we get fluctations in the number of data points per hour after 2003. @@ -1621,7 +936,7 @@ So there are no days with 100\% data. We need a different approach. \section{Runtime}\label{runtime} -Analysis completed in 211.44 seconds ( 3.52 minutes) using +Analysis completed in 221.35 seconds ( 3.69 minutes) using \href{https://cran.r-project.org/package=knitr}{knitr} in \href{http://www.rstudio.com}{RStudio} with R version 3.6.0 (2019-04-26) running on x86\_64-redhat-linux-gnu. @@ -1686,14 +1001,14 @@ running on x86\_64-redhat-linux-gnu. ## [17] glue_1.4.1 withr_2.1.2 lifecycle_0.2.0 stringr_1.4.0 ## [21] munsell_0.5.0 gtable_0.2.0 rvest_0.3.5 evaluate_0.14 ## [25] labeling_0.3 knitr_1.28 parallel_3.6.0 fansi_0.4.0 -## [29] highr_0.7 Rcpp_1.0.1 readr_1.3.1 scales_1.0.0 -## [33] backports_1.1.3 filelock_1.0.2 webshot_0.5.2 jsonlite_1.6 -## [37] digest_0.6.25 stringi_1.2.4 dplyr_1.0.0 grid_3.6.0 -## [41] rprojroot_1.3-2 cli_2.0.2 tools_3.6.0 magrittr_1.5 -## [45] base64url_1.4 tibble_3.0.1 crayon_1.3.4 pkgconfig_2.0.2 -## [49] ellipsis_0.3.1 xml2_1.3.2 prettyunits_1.0.2 httr_1.4.1 -## [53] assertthat_0.2.0 rmarkdown_2.2 rstudioapi_0.11 R6_2.3.0 -## [57] igraph_1.2.2 compiler_3.6.0 +## [29] Rcpp_1.0.1 readr_1.3.1 scales_1.0.0 backports_1.1.3 +## [33] filelock_1.0.2 webshot_0.5.2 jsonlite_1.6 digest_0.6.25 +## [37] stringi_1.2.4 dplyr_1.0.0 grid_3.6.0 rprojroot_1.3-2 +## [41] cli_2.0.2 tools_3.6.0 magrittr_1.5 base64url_1.4 +## [45] tibble_3.0.1 crayon_1.3.4 pkgconfig_2.0.2 ellipsis_0.3.1 +## [49] xml2_1.3.2 prettyunits_1.0.2 httr_1.4.1 assertthat_0.2.0 +## [53] rmarkdown_2.2 rstudioapi_0.11 R6_2.3.0 igraph_1.2.2 +## [57] compiler_3.6.0 \end{verbatim} \section{The raw data cleaning code}\label{the-raw-data-cleaning-code} diff --git a/make_cleanFeeders.R b/make_cleanFeeders.R index e36214e073466d8a1d21e55353f523f3306c628c..a8fcfc3af540d6d270532c9154aecd51a4662f6a 100644 --- a/make_cleanFeeders.R +++ b/make_cleanFeeders.R @@ -3,7 +3,9 @@ # Set up ---- startTime <- proc.time() -update <- "yes" # edit this in any way (at all) to get drake to re-load the data +updateData <- "yes" # edit this in any way (at all) to get drake to re-load the data +updateReport <- "yes" # edit this to force re-render of .Rmd + library(drake) # use r_make to run the plan inside a clean R session so nothing gets contaminated drake::r_make(source = "_drakeCleanFeeders.R") # where we keep the drake plan etc