diff --git a/Rmd/cleaningFeederData.Rmd b/Rmd/cleaningFeederData.Rmd index 8b4a8bc058532bce7fbd6ed093da4440d162fd21..0e3e22755e3abeaf866a305344e3e67e0ee0643d 100644 --- a/Rmd/cleaningFeederData.Rmd +++ b/Rmd/cleaningFeederData.Rmd @@ -98,6 +98,8 @@ feederDT <- uniqDataDT[!is.na(rDateTime)] # use dt with no duplicates origDataDT <- NULL # save memory ``` +There were `r tidyNum(nrow(origDataDT) - nrow(uniqDataDT))` duplicates - that's `r round(pc,2)` % of the observations loaded. + So we remove the duplicates... # Basic patterns diff --git a/_drakeCleanFeeders.R b/_drakeCleanFeeders.R index 7e52faf547d07f47ba5f887a3ac2ddffc59183b5..d914fad43611a6b5a84ddc855076e820725c0381 100644 --- a/_drakeCleanFeeders.R +++ b/_drakeCleanFeeders.R @@ -18,7 +18,7 @@ dataCleaning::loadLibraries(makeLibs) # Parameters ---- updateData <- "yep" # edit this in any way (at all) to get drake to re-load the data -updateReport <- "yep" # edit this in any way (at all) to get drake to re-load the data +updateReport <- "yes" # edit this in any way (at all) to get drake to re-load the data # Some data to play with: diff --git a/docs/cleaningFeederData_allData.html b/docs/cleaningFeederData_allData.html index f173181ffbd418c47ed9f47b91b76f938f0c8384..92aca205c594e41143da788420897e439d825ad7 100644 --- a/docs/cleaningFeederData_allData.html +++ b/docs/cleaningFeederData_allData.html @@ -181,7 +181,7 @@ summary { <h1 class="title toc-ignore">Testing electricity substation/feeder data</h1> <h3 class="subtitle">Outliers and missing data...</h3> <h4 class="author">Ben Anderson & Ellis Ridett</h4> -<h4 class="date">Last run at: 2020-07-09 00:43:38</h4> +<h4 class="date">Last run at: 2020-07-09 00:56:06</h4> </div> @@ -494,6 +494,7 @@ message("That's ", round(pc,2), "%") feederDT <- uniqDataDT[!is.na(rDateTime)] # use dt with no duplicates origDataDT <- NULL # save memory</code></pre> +<p>There were duplicates - that's 0.38 % of the observations loaded.</p> <p>So we remove the duplicates...</p> </div> </div> @@ -1499,7 +1500,7 @@ Fri </div> <div id="runtime" class="section level1"> <h1>Runtime</h1> -<p>Analysis completed in 187.13 seconds ( 3.12 minutes) using <a href="https://cran.r-project.org/package=knitr">knitr</a> in <a href="http://www.rstudio.com">RStudio</a> with R version 3.6.0 (2019-04-26) running on x86_64-redhat-linux-gnu.</p> +<p>Analysis completed in 196.02 seconds ( 3.27 minutes) using <a href="https://cran.r-project.org/package=knitr">knitr</a> in <a href="http://www.rstudio.com">RStudio</a> with R version 3.6.0 (2019-04-26) running on x86_64-redhat-linux-gnu.</p> </div> <div id="r-environment" class="section level1"> <h1>R environment</h1> @@ -1546,19 +1547,18 @@ Fri ## [1] storr_1.2.1 progress_1.2.2 tidyselect_1.1.0 xfun_0.15 ## [5] repr_1.1.0 purrr_0.3.4 colorspace_1.4-0 vctrs_0.3.1 ## [9] generics_0.0.2 viridisLite_0.3.0 htmltools_0.3.6 yaml_2.2.0 -## [13] base64enc_0.1-3 rlang_0.4.6 R.oo_1.22.0 pillar_1.4.4 -## [17] txtq_0.2.3 glue_1.4.1 withr_2.1.2 R.utils_2.7.0 -## [21] lifecycle_0.2.0 stringr_1.4.0 munsell_0.5.0 gtable_0.2.0 -## [25] rvest_0.3.5 R.methodsS3_1.7.1 codetools_0.2-16 evaluate_0.14 -## [29] labeling_0.3 knitr_1.29 parallel_3.6.0 fansi_0.4.0 -## [33] highr_0.7 Rcpp_1.0.1 readr_1.3.1 scales_1.0.0 -## [37] backports_1.1.3 filelock_1.0.2 webshot_0.5.2 jsonlite_1.6 -## [41] digest_0.6.25 stringi_1.2.4 dplyr_1.0.0 grid_3.6.0 -## [45] rprojroot_1.3-2 cli_2.0.2 tools_3.6.0 magrittr_1.5 -## [49] base64url_1.4 tibble_3.0.2 crayon_1.3.4 pkgconfig_2.0.2 -## [53] ellipsis_0.3.1 xml2_1.3.2 prettyunits_1.0.2 httr_1.4.1 -## [57] assertthat_0.2.0 rmarkdown_2.3 rstudioapi_0.11 R6_2.3.0 -## [61] igraph_1.2.2 compiler_3.6.0</code></pre> +## [13] base64enc_0.1-3 rlang_0.4.6 pillar_1.4.4 txtq_0.2.3 +## [17] glue_1.4.1 withr_2.1.2 lifecycle_0.2.0 stringr_1.4.0 +## [21] munsell_0.5.0 gtable_0.2.0 rvest_0.3.5 evaluate_0.14 +## [25] labeling_0.3 knitr_1.29 parallel_3.6.0 fansi_0.4.0 +## [29] highr_0.7 Rcpp_1.0.1 readr_1.3.1 scales_1.0.0 +## [33] backports_1.1.3 filelock_1.0.2 webshot_0.5.2 jsonlite_1.6 +## [37] digest_0.6.25 stringi_1.2.4 dplyr_1.0.0 grid_3.6.0 +## [41] rprojroot_1.3-2 cli_2.0.2 tools_3.6.0 magrittr_1.5 +## [45] base64url_1.4 tibble_3.0.2 crayon_1.3.4 pkgconfig_2.0.2 +## [49] ellipsis_0.3.1 xml2_1.3.2 prettyunits_1.0.2 httr_1.4.1 +## [53] assertthat_0.2.0 rmarkdown_2.3 rstudioapi_0.11 R6_2.3.0 +## [57] igraph_1.2.2 compiler_3.6.0</code></pre> </div> </div> <div id="the-raw-data-cleaning-code" class="section level1"> diff --git a/docs/cleaningFeederData_allData.pdf b/docs/cleaningFeederData_allData.pdf index 6ee0a8884100b83b83eebaa8909eef2d1bee4ade..6268d3469501440f1ee512c5b142717b1624b0ef 100644 Binary files a/docs/cleaningFeederData_allData.pdf and b/docs/cleaningFeederData_allData.pdf differ