diff --git a/Rmd/cleaningFeederData_allData.log b/Rmd/cleaningFeederData_allData.log deleted file mode 100644 index 318599028f02c106bb96cf5612c33b613d85db70..0000000000000000000000000000000000000000 --- a/Rmd/cleaningFeederData_allData.log +++ /dev/null @@ -1,540 +0,0 @@ -This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013) (format=pdflatex 2020.4.15) 9 JUL 2020 00:11 -entering extended mode - restricted \write18 enabled. - %&-line parsing enabled. -**/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData.te -x - -(/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData.tex -LaTeX2e <2011/06/27> -Babel <v3.8m> and hyphenation patterns for english, dumylang, nohyphenation, lo -aded. -(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls -Document Class: article 2007/10/19 v1.4h Standard LaTeX document class -(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo -File: size10.clo 2007/10/19 v1.4h Standard LaTeX file (size option) -) -\c@part=\count79 -\c@section=\count80 -\c@subsection=\count81 -\c@subsubsection=\count82 -\c@paragraph=\count83 -\c@subparagraph=\count84 -\c@figure=\count85 -\c@table=\count86 -\abovecaptionskip=\skip41 -\belowcaptionskip=\skip42 -\bibindent=\dimen102 -) (/usr/share/texlive/texmf-dist/tex/latex/lm/lmodern.sty -Package: lmodern 2009/10/30 v1.6 Latin Modern Fonts -LaTeX Font Info: Overwriting symbol font `operators' in version `normal' -(Font) OT1/cmr/m/n --> OT1/lmr/m/n on input line 22. -LaTeX Font Info: Overwriting symbol font `letters' in version `normal' -(Font) OML/cmm/m/it --> OML/lmm/m/it on input line 23. -LaTeX Font Info: Overwriting symbol font `symbols' in version `normal' -(Font) OMS/cmsy/m/n --> OMS/lmsy/m/n on input line 24. -LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal' -(Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 25. -LaTeX Font Info: Overwriting symbol font `operators' in version `bold' -(Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 26. -LaTeX Font Info: Overwriting symbol font `letters' in version `bold' -(Font) OML/cmm/b/it --> OML/lmm/b/it on input line 27. -LaTeX Font Info: Overwriting symbol font `symbols' in version `bold' -(Font) OMS/cmsy/b/n --> OMS/lmsy/b/n on input line 28. -LaTeX Font Info: Overwriting symbol font `largesymbols' in version `bold' -(Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 29. -LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' -(Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 31. -LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal' -(Font) OT1/cmss/m/n --> OT1/lmss/m/n on input line 32. -LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' -(Font) OT1/cmr/m/it --> OT1/lmr/m/it on input line 33. -LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal' -(Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 34. -LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `bold' -(Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 35. -LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold' -(Font) OT1/cmss/bx/n --> OT1/lmss/bx/n on input line 36. -LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' -(Font) OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37. -LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold' -(Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38. -) (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amssymb.sty -Package: amssymb 2013/01/14 v3.01 AMS font symbols -(/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amsfonts.sty -Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support -\@emptytoks=\toks14 -\symAMSa=\mathgroup4 -\symAMSb=\mathgroup5 -LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' -(Font) U/euf/m/n --> U/euf/b/n on input line 106. -)) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty -Package: amsmath 2013/01/14 v2.14 AMS math features -\@mathmargin=\skip43 -For additional information on amsmath, use the `?' option. -(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty -Package: amstext 2000/06/29 v2.01 -(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty -File: amsgen.sty 1999/11/30 v2.0 -\@emptytoks=\toks15 -\ex@=\dimen103 -)) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty -Package: amsbsy 1999/11/29 v1.2d -\pmbraise@=\dimen104 -) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty -Package: amsopn 1999/12/14 v2.01 operator names -) -\inf@bad=\count87 -LaTeX Info: Redefining \frac on input line 210. -\uproot@=\count88 -\leftroot@=\count89 -LaTeX Info: Redefining \overline on input line 306. -\classnum@=\count90 -\DOTSCASE@=\count91 -LaTeX Info: Redefining \ldots on input line 378. -LaTeX Info: Redefining \dots on input line 381. -LaTeX Info: Redefining \cdots on input line 466. -\Mathstrutbox@=\box26 -\strutbox@=\box27 -\big@size=\dimen105 -LaTeX Font Info: Redeclaring font encoding OML on input line 566. -LaTeX Font Info: Redeclaring font encoding OMS on input line 567. -\macc@depth=\count92 -\c@MaxMatrixCols=\count93 -\dotsspace@=\muskip10 -\c@parentequation=\count94 -\dspbrk@lvl=\count95 -\tag@help=\toks16 -\row@=\count96 -\column@=\count97 -\maxfields@=\count98 -\andhelp@=\toks17 -\eqnshift@=\dimen106 -\alignsep@=\dimen107 -\tagshift@=\dimen108 -\tagwidth@=\dimen109 -\totwidth@=\dimen110 -\lineht@=\dimen111 -\@envbody=\toks18 -\multlinegap=\skip44 -\multlinetaggap=\skip45 -\mathdisplay@stack=\toks19 -LaTeX Info: Redefining \[ on input line 2665. -LaTeX Info: Redefining \] on input line 2666. -) (/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty -Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional -) (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifluatex.sty -Package: ifluatex 2010/03/01 v1.3 Provides the ifluatex switch (HO) -Package ifluatex Info: LuaTeX not detected. -) (/usr/share/texlive/texmf-dist/tex/latex/base/fixltx2e.sty -Package: fixltx2e 2006/09/13 v1.1m fixes to LaTeX -LaTeX Info: Redefining \em on input line 420. -LaTeX Info: The control sequence `\[' is already robust on input line 471. -LaTeX Info: The control sequence `\]' is already robust on input line 472. -) (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty -Package: fontenc 2005/09/27 v1.99g Standard LaTeX package -(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def -File: t1enc.def 2005/09/27 v1.99g Standard LaTeX file -LaTeX Font Info: Redeclaring font encoding T1 on input line 43. -)) (/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty -Package: inputenc 2008/03/30 v1.1d Input encoding file -\inpenc@prehook=\toks20 -\inpenc@posthook=\toks21 -(/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def -File: utf8.def 2008/04/05 v1.1m UTF-8 support for inputenc -Now handling font encoding OML ... -... no UTF-8 mapping file for font encoding OML -Now handling font encoding T1 ... -... processing UTF-8 mapping file for font encoding T1 -(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu -File: t1enc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc - defining Unicode char U+00A1 (decimal 161) - defining Unicode char U+00A3 (decimal 163) - defining Unicode char U+00AB (decimal 171) - defining Unicode char U+00BB (decimal 187) - defining Unicode char U+00BF (decimal 191) - defining Unicode char U+00C0 (decimal 192) - defining Unicode char U+00C1 (decimal 193) - defining Unicode char U+00C2 (decimal 194) - defining Unicode char U+00C3 (decimal 195) - defining Unicode char U+00C4 (decimal 196) - defining Unicode char U+00C5 (decimal 197) - defining Unicode char U+00C6 (decimal 198) - defining Unicode char U+00C7 (decimal 199) - defining Unicode char U+00C8 (decimal 200) - defining Unicode char U+00C9 (decimal 201) - defining Unicode char U+00CA (decimal 202) - defining Unicode char U+00CB (decimal 203) - defining Unicode char U+00CC (decimal 204) - defining Unicode char U+00CD (decimal 205) - defining Unicode char U+00CE (decimal 206) - defining Unicode char U+00CF (decimal 207) - defining Unicode char U+00D0 (decimal 208) - defining Unicode char U+00D1 (decimal 209) - defining Unicode char U+00D2 (decimal 210) - defining Unicode char U+00D3 (decimal 211) - defining Unicode char U+00D4 (decimal 212) - defining Unicode char U+00D5 (decimal 213) - defining Unicode char U+00D6 (decimal 214) - defining Unicode char U+00D8 (decimal 216) - defining Unicode char U+00D9 (decimal 217) - defining Unicode char U+00DA (decimal 218) - defining Unicode char U+00DB (decimal 219) - defining Unicode char U+00DC (decimal 220) - defining Unicode char U+00DD (decimal 221) - defining Unicode char U+00DE (decimal 222) - defining Unicode char U+00DF (decimal 223) - defining Unicode char U+00E0 (decimal 224) - defining Unicode char U+00E1 (decimal 225) - defining Unicode char U+00E2 (decimal 226) - defining Unicode char U+00E3 (decimal 227) - defining Unicode char U+00E4 (decimal 228) - defining Unicode char U+00E5 (decimal 229) - defining Unicode char U+00E6 (decimal 230) - defining Unicode char U+00E7 (decimal 231) - defining Unicode char U+00E8 (decimal 232) - defining Unicode char U+00E9 (decimal 233) - defining Unicode char U+00EA (decimal 234) - defining Unicode char U+00EB (decimal 235) - defining Unicode char U+00EC (decimal 236) - defining Unicode char U+00ED (decimal 237) - defining Unicode char U+00EE (decimal 238) - defining Unicode char U+00EF (decimal 239) - defining Unicode char U+00F0 (decimal 240) - defining Unicode char U+00F1 (decimal 241) - defining Unicode char U+00F2 (decimal 242) - defining Unicode char U+00F3 (decimal 243) - defining Unicode char U+00F4 (decimal 244) - defining Unicode char U+00F5 (decimal 245) - defining Unicode char U+00F6 (decimal 246) - defining Unicode char U+00F8 (decimal 248) - defining Unicode char U+00F9 (decimal 249) - defining Unicode char U+00FA (decimal 250) - defining Unicode char U+00FB (decimal 251) - defining Unicode char U+00FC (decimal 252) - defining Unicode char U+00FD (decimal 253) - defining Unicode char U+00FE (decimal 254) - defining Unicode char U+00FF (decimal 255) - defining Unicode char U+0102 (decimal 258) - defining Unicode char U+0103 (decimal 259) - defining Unicode char U+0104 (decimal 260) - defining Unicode char U+0105 (decimal 261) - defining Unicode char U+0106 (decimal 262) - defining Unicode char U+0107 (decimal 263) - defining Unicode char U+010C (decimal 268) - defining Unicode char U+010D (decimal 269) - defining Unicode char U+010E (decimal 270) - defining Unicode char U+010F (decimal 271) - defining Unicode char U+0110 (decimal 272) - defining Unicode char U+0111 (decimal 273) - defining Unicode char U+0118 (decimal 280) - defining Unicode char U+0119 (decimal 281) - defining Unicode char U+011A (decimal 282) - defining Unicode char U+011B (decimal 283) - defining Unicode char U+011E (decimal 286) - defining Unicode char U+011F (decimal 287) - defining Unicode char U+0130 (decimal 304) - defining Unicode char U+0131 (decimal 305) - defining Unicode char U+0132 (decimal 306) - defining Unicode char U+0133 (decimal 307) - defining Unicode char U+0139 (decimal 313) - defining Unicode char U+013A (decimal 314) - defining Unicode char U+013D (decimal 317) - defining Unicode char U+013E (decimal 318) - defining Unicode char U+0141 (decimal 321) - defining Unicode char U+0142 (decimal 322) - defining Unicode char U+0143 (decimal 323) - defining Unicode char U+0144 (decimal 324) - defining Unicode char U+0147 (decimal 327) - defining Unicode char U+0148 (decimal 328) - defining Unicode char U+014A (decimal 330) - defining Unicode char U+014B (decimal 331) - defining Unicode char U+0150 (decimal 336) - defining Unicode char U+0151 (decimal 337) - defining Unicode char U+0152 (decimal 338) - defining Unicode char U+0153 (decimal 339) - defining Unicode char U+0154 (decimal 340) - defining Unicode char U+0155 (decimal 341) - defining Unicode char U+0158 (decimal 344) - defining Unicode char U+0159 (decimal 345) - defining Unicode char U+015A (decimal 346) - defining Unicode char U+015B (decimal 347) - defining Unicode char U+015E (decimal 350) - defining Unicode char U+015F (decimal 351) - defining Unicode char U+0160 (decimal 352) - defining Unicode char U+0161 (decimal 353) - defining Unicode char U+0162 (decimal 354) - defining Unicode char U+0163 (decimal 355) - defining Unicode char U+0164 (decimal 356) - defining Unicode char U+0165 (decimal 357) - defining Unicode char U+016E (decimal 366) - defining Unicode char U+016F (decimal 367) - defining Unicode char U+0170 (decimal 368) - defining Unicode char U+0171 (decimal 369) - defining Unicode char U+0178 (decimal 376) - defining Unicode char U+0179 (decimal 377) - defining Unicode char U+017A (decimal 378) - defining Unicode char U+017B (decimal 379) - defining Unicode char U+017C (decimal 380) - defining Unicode char U+017D (decimal 381) - defining Unicode char U+017E (decimal 382) - defining Unicode char U+200C (decimal 8204) - defining Unicode char U+2013 (decimal 8211) - defining Unicode char U+2014 (decimal 8212) - defining Unicode char U+2018 (decimal 8216) - defining Unicode char U+2019 (decimal 8217) - defining Unicode char U+201A (decimal 8218) - defining Unicode char U+201C (decimal 8220) - defining Unicode char U+201D (decimal 8221) - defining Unicode char U+201E (decimal 8222) - defining Unicode char U+2030 (decimal 8240) - defining Unicode char U+2031 (decimal 8241) - defining Unicode char U+2039 (decimal 8249) - defining Unicode char U+203A (decimal 8250) - defining Unicode char U+2423 (decimal 9251) -) -Now handling font encoding OT1 ... -... processing UTF-8 mapping file for font encoding OT1 -(/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu -File: ot1enc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc - defining Unicode char U+00A1 (decimal 161) - defining Unicode char U+00A3 (decimal 163) - defining Unicode char U+00B8 (decimal 184) - defining Unicode char U+00BF (decimal 191) - defining Unicode char U+00C5 (decimal 197) - defining Unicode char U+00C6 (decimal 198) - defining Unicode char U+00D8 (decimal 216) - defining Unicode char U+00DF (decimal 223) - defining Unicode char U+00E6 (decimal 230) - defining Unicode char U+00EC (decimal 236) - defining Unicode char U+00ED (decimal 237) - defining Unicode char U+00EE (decimal 238) - defining Unicode char U+00EF (decimal 239) - defining Unicode char U+00F8 (decimal 248) - defining Unicode char U+0131 (decimal 305) - defining Unicode char U+0141 (decimal 321) - defining Unicode char U+0142 (decimal 322) - defining Unicode char U+0152 (decimal 338) - defining Unicode char U+0153 (decimal 339) - defining Unicode char U+2013 (decimal 8211) - defining Unicode char U+2014 (decimal 8212) - defining Unicode char U+2018 (decimal 8216) - defining Unicode char U+2019 (decimal 8217) - defining Unicode char U+201C (decimal 8220) - defining Unicode char U+201D (decimal 8221) -) -Now handling font encoding OMS ... -... processing UTF-8 mapping file for font encoding OMS -(/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu -File: omsenc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc - defining Unicode char U+00A7 (decimal 167) - defining Unicode char U+00B6 (decimal 182) - defining Unicode char U+00B7 (decimal 183) - defining Unicode char U+2020 (decimal 8224) - defining Unicode char U+2021 (decimal 8225) - defining Unicode char U+2022 (decimal 8226) -) -Now handling font encoding OMX ... -... no UTF-8 mapping file for font encoding OMX -Now handling font encoding U ... -... no UTF-8 mapping file for font encoding U - defining Unicode char U+00A9 (decimal 169) - defining Unicode char U+00AA (decimal 170) - defining Unicode char U+00AE (decimal 174) - defining Unicode char U+00BA (decimal 186) - defining Unicode char U+02C6 (decimal 710) - defining Unicode char U+02DC (decimal 732) - defining Unicode char U+200C (decimal 8204) - defining Unicode char U+2026 (decimal 8230) - defining Unicode char U+2122 (decimal 8482) - defining Unicode char U+2423 (decimal 9251) -)) (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.sty -Package: microtype 2013/03/13 v2.5 Micro-typographical refinements (RS) -(/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty -Package: keyval 1999/03/16 v1.13 key=value parser (DPC) -\KV@toks@=\toks22 -) -\MT@toks=\toks23 -\MT@count=\count99 -LaTeX Info: Redefining \textls on input line 771. -\MT@outer@kern=\dimen112 -LaTeX Info: Redefining \textmicrotypecontext on input line 1290. -\MT@listname@count=\count100 -(/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype-pdftex.def -File: microtype-pdftex.def 2013/03/13 v2.5 Definitions specific to pdftex (RS) -LaTeX Info: Redefining \lsstyle on input line 889. -LaTeX Info: Redefining \lslig on input line 889. -\MT@outer@space=\skip46 -) -Package microtype Info: Loading configuration file microtype.cfg. -(/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.cfg -File: microtype.cfg 2013/03/13 v2.5 microtype main configuration file (RS) -)) (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hyperref.sty -Package: hyperref 2012/11/06 v6.83m Hypertext links for LaTeX -(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty -Package: hobsub-hyperref 2012/05/28 v1.13 Bundle oberdiek, subset hyperref (HO) - -(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty -Package: hobsub-generic 2012/05/28 v1.13 Bundle oberdiek, subset generic (HO) -Package: hobsub 2012/05/28 v1.13 Construct package bundles (HO) -Package: infwarerr 2010/04/08 v1.3 Providing info/warning/error messages (HO) -Package: ltxcmds 2011/11/09 v1.22 LaTeX kernel commands for general use (HO) -Package hobsub Info: Skipping package `ifluatex' (already loaded). -Package: ifvtex 2010/03/01 v1.5 Detect VTeX and its facilities (HO) -Package ifvtex Info: VTeX not detected. -Package: intcalc 2007/09/27 v1.1 Expandable calculations with integers (HO) -Package: ifpdf 2011/01/30 v2.3 Provides the ifpdf switch (HO) -Package ifpdf Info: pdfTeX in PDF mode is detected. -Package: etexcmds 2011/02/16 v1.5 Avoid name clashes with e-TeX commands (HO) -Package etexcmds Info: Could not find \expanded. -(etexcmds) That can mean that you are not using pdfTeX 1.50 or -(etexcmds) that some package has redefined \expanded. -(etexcmds) In the latter case, load this package earlier. -Package: kvsetkeys 2012/04/25 v1.16 Key value parser (HO) -Package: kvdefinekeys 2011/04/07 v1.3 Define keys (HO) -Package: pdftexcmds 2011/11/29 v0.20 Utility functions of pdfTeX for LuaTeX (HO -) -Package pdftexcmds Info: LuaTeX not detected. -Package pdftexcmds Info: \pdf@primitive is available. -Package pdftexcmds Info: \pdf@ifprimitive is available. -Package pdftexcmds Info: \pdfdraftmode found. -Package: pdfescape 2011/11/25 v1.13 Implements pdfTeX's escape features (HO) -Package: bigintcalc 2012/04/08 v1.3 Expandable calculations on big integers (HO -) -Package: bitset 2011/01/30 v1.1 Handle bit-vector datatype (HO) -Package: uniquecounter 2011/01/30 v1.2 Provide unlimited unique counter (HO) -) -Package hobsub Info: Skipping package `hobsub' (already loaded). -Package: letltxmacro 2010/09/02 v1.4 Let assignment for LaTeX macros (HO) -Package: hopatch 2012/05/28 v1.2 Wrapper for package hooks (HO) -Package: xcolor-patch 2011/01/30 xcolor patch -Package: atveryend 2011/06/30 v1.8 Hooks at the very end of document (HO) -Package atveryend Info: \enddocument detected (standard20110627). -Package: atbegshi 2011/10/05 v1.16 At begin shipout hook (HO) -Package: refcount 2011/10/16 v3.4 Data extraction from label references (HO) -Package: hycolor 2011/01/30 v1.7 Color options for hyperref/bookmark (HO) -) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/auxhook.sty -Package: auxhook 2011/03/04 v1.3 Hooks for auxiliary files (HO) -) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/kvoptions.sty -Package: kvoptions 2011/06/30 v3.11 Key value format for package options (HO) -) -\@linkdim=\dimen113 -\Hy@linkcounter=\count101 -\Hy@pagecounter=\count102 -(/usr/share/texlive/texmf-dist/tex/latex/hyperref/pd1enc.def -File: pd1enc.def 2012/11/06 v6.83m Hyperref: PDFDocEncoding definition (HO) -Now handling font encoding PD1 ... -... no UTF-8 mapping file for font encoding PD1 -) -\Hy@SavedSpaceFactor=\count103 -(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/hyperref.cfg -File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive -) -Package hyperref Info: Option `unicode' set `true' on input line 4319. -(/usr/share/texlive/texmf-dist/tex/latex/hyperref/puenc.def -File: puenc.def 2012/11/06 v6.83m Hyperref: PDF Unicode definition (HO) -Now handling font encoding PU ... -... no UTF-8 mapping file for font encoding PU -) -Package hyperref Info: Hyper figures OFF on input line 4443. -Package hyperref Info: Link nesting OFF on input line 4448. -Package hyperref Info: Hyper index ON on input line 4451. -Package hyperref Info: Plain pages OFF on input line 4458. -Package hyperref Info: Backreferencing OFF on input line 4463. -Package hyperref Info: Implicit mode ON; LaTeX internals redefined. -Package hyperref Info: Bookmarks ON on input line 4688. -\c@Hy@tempcnt=\count104 -(/usr/share/texlive/texmf-dist/tex/latex/url/url.sty -\Urlmuskip=\muskip11 -Package: url 2006/04/12 ver 3.3 Verb mode for urls, etc. -) -LaTeX Info: Redefining \url on input line 5041. -\XeTeXLinkMargin=\dimen114 -\Fld@menulength=\count105 -\Field@Width=\dimen115 -\Fld@charsize=\dimen116 -Package hyperref Info: Hyper figures OFF on input line 6295. -Package hyperref Info: Link nesting OFF on input line 6300. -Package hyperref Info: Hyper index ON on input line 6303. -Package hyperref Info: backreferencing OFF on input line 6310. -Package hyperref Info: Link coloring OFF on input line 6315. -Package hyperref Info: Link coloring with OCG OFF on input line 6320. -Package hyperref Info: PDF/A mode OFF on input line 6325. -LaTeX Info: Redefining \ref on input line 6365. -LaTeX Info: Redefining \pageref on input line 6369. -\Hy@abspage=\count106 -\c@Item=\count107 -\c@Hfootnote=\count108 -) - -Package hyperref Message: Driver (autodetected): hpdftex. - -(/usr/share/texlive/texmf-dist/tex/latex/hyperref/hpdftex.def -File: hpdftex.def 2012/11/06 v6.83m Hyperref driver for pdfTeX -\Fld@listcount=\count109 -\c@bookmark@seq@number=\count110 -(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty -Package: rerunfilecheck 2011/04/15 v1.7 Rerun checks for auxiliary files (HO) -Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2 -82. -) -\Hy@SectionHShift=\skip47 -) -Package hyperref Info: Option `breaklinks' set `true' on input line 30. -(/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty -Package: geometry 2010/09/12 v5.6 Page Geometry -\Gm@cnth=\count111 -\Gm@cntv=\count112 -\c@Gm@tempcnt=\count113 -\Gm@bindingoffset=\dimen117 -\Gm@wd@mp=\dimen118 -\Gm@odd@mp=\dimen119 -\Gm@even@mp=\dimen120 -\Gm@layoutwidth=\dimen121 -\Gm@layoutheight=\dimen122 -\Gm@layouthoffset=\dimen123 -\Gm@layoutvoffset=\dimen124 -\Gm@dimlist=\toks24 -) (/usr/share/texlive/texmf-dist/tex/latex/graphics/color.sty -Package: color 2005/11/14 v1.0j Standard LaTeX Color (DPC) -(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/color.cfg -File: color.cfg 2007/01/18 v1.5 color configuration of teTeX/TeXLive -) -Package color Info: Driver file: pdftex.def on input line 130. -(/usr/share/texlive/texmf-dist/tex/latex/pdftex-def/pdftex.def -File: pdftex.def 2011/05/27 v0.06d Graphics/color for pdfTeX -\Gread@gobject=\count114 -)) (/usr/share/texlive/texmf-dist/tex/latex/fancyvrb/fancyvrb.sty -Package: fancyvrb 2008/02/07 - -Style option: `fancyvrb' v2.7a, with DG/SPQR fixes, and firstline=lastline fix -<2008/02/07> (tvz) -\FV@CodeLineNo=\count115 -\FV@InFile=\read1 -\FV@TabBox=\box28 -\c@FancyVerbLine=\count116 -\FV@StepNumber=\count117 -\FV@OutFile=\write3 -) - -! LaTeX Error: File `framed.sty' not found. - -Type X to quit or <RETURN> to proceed, -or enter new name. (Default extension: sty) - -Enter file name: -! Emergency stop. -<read *> - -l.40 \definecolor - {shadecolor}{RGB}{248,248,248}^^M -Here is how much of TeX's memory you used: - 10860 strings out of 495063 - 153982 string characters out of 3182201 - 252362 words of memory out of 3000000 - 14008 multiletter control sequences out of 15000+200000 - 4403 words of font info for 15 fonts, out of 3000000 for 9000 - 14 hyphenation exceptions out of 8191 - 31i,0n,35p,299b,272s stack positions out of 5000i,500n,10000p,200000b,50000s - -! ==> Fatal error occurred, no output PDF file produced! diff --git a/docs/cleaningFeederData_allData.pdf b/docs/cleaningFeederData_allData.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6ee0a8884100b83b83eebaa8909eef2d1bee4ade Binary files /dev/null and b/docs/cleaningFeederData_allData.pdf differ diff --git a/docs/cleaningFeederData_allData.tex b/docs/cleaningFeederData_allData.tex deleted file mode 100644 index 8528c853b0f3d1118c085a7e51b0fa13f91dc66d..0000000000000000000000000000000000000000 --- a/docs/cleaningFeederData_allData.tex +++ /dev/null @@ -1,1292 +0,0 @@ -\documentclass[]{article} -\usepackage{lmodern} -\usepackage{amssymb,amsmath} -\usepackage{ifxetex,ifluatex} -\usepackage{fixltx2e} % provides \textsubscript -\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex - \usepackage[T1]{fontenc} - \usepackage[utf8]{inputenc} -\else % if luatex or xelatex - \ifxetex - \usepackage{mathspec} - \else - \usepackage{fontspec} - \fi - \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} -\fi -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} -% use microtype if available -\IfFileExists{microtype.sty}{% -\usepackage[]{microtype} -\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts -}{} -\PassOptionsToPackage{hyphens}{url} % url is loaded by hyperref -\usepackage[unicode=true]{hyperref} -\hypersetup{ - pdftitle={Testing electricity substation/feeder data}, - pdfauthor={Ben Anderson \& Ellis Ridett}, - pdfborder={0 0 0}, - breaklinks=true} -\urlstyle{same} % don't use monospace font for urls -\usepackage[margin=1in]{geometry} -\usepackage{color} -\usepackage{fancyvrb} -\newcommand{\VerbBar}{|} -\newcommand{\VERB}{\Verb[commandchars=\\\{\}]} -\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} -% Add ',fontsize=\small' for more characters per line -\usepackage{framed} -\definecolor{shadecolor}{RGB}{248,248,248} -\newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}} -\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}} -\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}} -\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}} -\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}} -\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}} -\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}} -\newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}} -\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}} -\newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}} -\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}} -\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}} -\newcommand{\ImportTok}[1]{#1} -\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}} -\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}} -\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}} -\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}} -\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}} -\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}} -\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}} -\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}} -\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}} -\newcommand{\BuiltInTok}[1]{#1} -\newcommand{\ExtensionTok}[1]{#1} -\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}} -\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}} -\newcommand{\RegionMarkerTok}[1]{#1} -\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}} -\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}} -\newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}} -\newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}} -\newcommand{\NormalTok}[1]{#1} -\usepackage{graphicx,grffile} -\makeatletter -\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} -\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} -\makeatother -% Scale images if necessary, so that they will not overflow the page -% margins by default, and it is still possible to overwrite the defaults -% using explicit options in \includegraphics[width, height, ...]{} -\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} -\IfFileExists{parskip.sty}{% -\usepackage{parskip} -}{% else -\setlength{\parindent}{0pt} -\setlength{\parskip}{6pt plus 2pt minus 1pt} -} -\setlength{\emergencystretch}{3em} % prevent overfull lines -\providecommand{\tightlist}{% - \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} -\setcounter{secnumdepth}{0} -% Redefines (sub)paragraphs to behave more like sections -\ifx\paragraph\undefined\else -\let\oldparagraph\paragraph -\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}} -\fi -\ifx\subparagraph\undefined\else -\let\oldsubparagraph\subparagraph -\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}} -\fi - -% set default figure placement to htbp -\makeatletter -\def\fps@figure{htbp} -\makeatother - -\usepackage{etoolbox} -\makeatletter -\providecommand{\subtitle}[1]{% add subtitle to \maketitle - \apptocmd{\@title}{\par {\large #1 \par}}{}{} -} -\makeatother -\usepackage{booktabs} -\usepackage{longtable} -\usepackage{array} -\usepackage{multirow} -\usepackage{wrapfig} -\usepackage{float} -\usepackage{colortbl} -\usepackage{pdflscape} -\usepackage{tabu} -\usepackage{threeparttable} -\usepackage{threeparttablex} -\usepackage[normalem]{ulem} -\usepackage{makecell} -\usepackage{xcolor} - -\title{Testing electricity substation/feeder data} -\providecommand{\subtitle}[1]{} -\subtitle{Outliers and missing data\ldots{}} -\author{Ben Anderson \& Ellis Ridett} -\date{Last run at: 2020-07-09 00:07:52} - -\begin{document} -\maketitle - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{# Knitr setup ----} -\NormalTok{knitr}\OperatorTok{::}\NormalTok{opts_chunk}\OperatorTok{$}\KeywordTok{set}\NormalTok{(}\DataTypeTok{echo =} \OtherTok{TRUE}\NormalTok{)} -\NormalTok{knitr}\OperatorTok{::}\NormalTok{opts_chunk}\OperatorTok{$}\KeywordTok{set}\NormalTok{(}\DataTypeTok{warning =} \OtherTok{FALSE}\NormalTok{) }\CommentTok{# for final tidy run} -\NormalTok{knitr}\OperatorTok{::}\NormalTok{opts_chunk}\OperatorTok{$}\KeywordTok{set}\NormalTok{(}\DataTypeTok{message =} \OtherTok{FALSE}\NormalTok{) }\CommentTok{# for final tidy run} - -\CommentTok{# Set start time ----} -\NormalTok{startTime <-}\StringTok{ }\KeywordTok{proc.time}\NormalTok{()} - -\CommentTok{# Libraries ----} -\NormalTok{rmdLibs <-}\StringTok{ }\KeywordTok{c}\NormalTok{(}\StringTok{"kableExtra"} \CommentTok{# tables} -\NormalTok{)} -\CommentTok{# load them} -\NormalTok{dataCleaning}\OperatorTok{::}\KeywordTok{loadLibraries}\NormalTok{(rmdLibs)} -\end{Highlighting} -\end{Shaded} - -\begin{verbatim} -## Loading required package: kableExtra -\end{verbatim} - -\begin{verbatim} -## kableExtra -## TRUE -\end{verbatim} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{# Parameters ----} -\CommentTok{#dFile <- "~/Dropbox/Ben_IOW_SS.csv" # edit for your set up} - - -\CommentTok{# Functions ----} -\CommentTok{# put more general ones that could be useful to everyone in /R so they are built into the package.} - -\CommentTok{# put functions relevant to this analysis here} -\end{Highlighting} -\end{Shaded} - -\section{Intro}\label{intro} - -We have some electricity substation feeder data that has been cleaned to -give mean kW per 15 minutes. - -There seem to be some NA kW values and a lot of missing time stamps. We -want to select the `best' (i.e most complete) days within a -day-of-the-week/season/year sampling frame. If we can't do that we may -have to resort to seasonal mean kW profiles by hour \& day of the -week\ldots{} - -Code used to generate this report: -\url{https://git.soton.ac.uk/ba1e12/spatialec/-/blob/master/isleOfWight/cleaningFeederData.Rmd} - -\section{Data prep}\label{data-prep} - -\subsection{Load data}\label{load-data} - -Loaded data from -/mnt/SERG\_data/Ellis\_IOW/Cleaned\_SS\_Amps/amps\_all\_substations.csv.gz\ldots{} -(using drake) - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{origDataDT <-}\StringTok{ }\NormalTok{drake}\OperatorTok{::}\KeywordTok{readd}\NormalTok{(origData) }\CommentTok{# readd the drake object} - -\NormalTok{uniqDataDT <-}\StringTok{ }\NormalTok{drake}\OperatorTok{::}\KeywordTok{readd}\NormalTok{(uniqData) }\CommentTok{# readd the drake object} - -\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(}\KeywordTok{head}\NormalTok{(origDataDT), }\DataTypeTok{digits =} \DecValTok{2}\NormalTok{,} - \DataTypeTok{caption =} \StringTok{"Counts per feeder (long table)"}\NormalTok{) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} -\end{Highlighting} -\end{Shaded} - -\begin{table} - -\caption{\label{tab:loadData}Counts per feeder (long table)} -\centering -\begin{tabular}[t]{l|l|l|l|l|l|r|l|r|l|l} -\hline -Time & region & sub\_region & rDateTime & rTime & rDate & rYear & rDoW & kW & feeder\_ID & season\\ -\hline -2003-01-13T10:30:00Z & ARRN & ARRN & 2003-01-13 10:30:00 & 10:30:00 & 2003-01-13 & 2003 & Mon & 2.00 & ARRN\_ARRN & Winter\\ -\hline -2003-01-13T10:45:00Z & ARRN & ARRN & 2003-01-13 10:45:00 & 10:45:00 & 2003-01-13 & 2003 & Mon & 18.25 & ARRN\_ARRN & Winter\\ -\hline -2003-01-13T11:15:00Z & ARRN & ARRN & 2003-01-13 11:15:00 & 11:15:00 & 2003-01-13 & 2003 & Mon & 0.67 & ARRN\_ARRN & Winter\\ -\hline -2003-01-13T11:30:00Z & ARRN & ARRN & 2003-01-13 11:30:00 & 11:30:00 & 2003-01-13 & 2003 & Mon & 28.50 & ARRN\_ARRN & Winter\\ -\hline -2003-01-13T11:45:00Z & ARRN & ARRN & 2003-01-13 11:45:00 & 11:45:00 & 2003-01-13 & 2003 & Mon & 19.56 & ARRN\_ARRN & Winter\\ -\hline -2003-01-13T12:15:00Z & ARRN & ARRN & 2003-01-13 12:15:00 & 12:15:00 & 2003-01-13 & 2003 & Mon & 12.80 & ARRN\_ARRN & Winter\\ -\hline -\end{tabular} -\end{table} - -Check data prep worked OK. - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{# check} -\NormalTok{t <-}\StringTok{ }\NormalTok{origDataDT[, .(}\DataTypeTok{nObs =}\NormalTok{ .N,} - \DataTypeTok{firstDate =} \KeywordTok{min}\NormalTok{(rDateTime, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{),} - \DataTypeTok{lastDate =} \KeywordTok{max}\NormalTok{(rDateTime, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{),} - \DataTypeTok{meankW =} \KeywordTok{mean}\NormalTok{(kW, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{)} -\NormalTok{), keyby =}\StringTok{ }\NormalTok{.(region, feeder_ID)]} - -\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(t, }\DataTypeTok{digits =} \DecValTok{2}\NormalTok{,} - \DataTypeTok{caption =} \StringTok{"Counts per feeder (long table)"}\NormalTok{) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} -\end{Highlighting} -\end{Shaded} - -\begin{table} - -\caption{\label{tab:dataPrep}Counts per feeder (long table)} -\centering -\begin{tabular}[t]{l|l|r|l|l|r} -\hline -region & feeder\_ID & nObs & firstDate & lastDate & meankW\\ -\hline -ARRN & ARRN\_ARRN & 94909 & 2003-01-13 10:30:00 & 2017-10-25 22:15:00 & 151.74\\ -\hline -BINS & BINS\_C1T0 & 218480 & 2001-09-21 07:30:00 & 2017-10-13 23:45:00 & 94.70\\ -\hline -BINS & BINS\_C2T0 & 208447 & 2001-09-21 07:30:00 & 2017-10-13 23:45:00 & 93.91\\ -\hline -BINS & BINS\_E1L5 & 414980 & 2001-09-21 07:30:00 & 2017-10-13 23:15:00 & 94.31\\ -\hline -BINS & BINS\_E2L5 & 115260 & 2001-10-10 12:00:00 & 2017-10-04 20:45:00 & 20.58\\ -\hline -BINS & BINS\_E3L5 & 337064 & 2001-10-10 12:00:00 & 2017-10-14 23:30:00 & 59.67\\ -\hline -FFPV & FFPV\_FFPV & 32278 & 2014-09-25 09:15:00 & 2017-10-11 16:15:00 & 36.14\\ -\hline -FRES & FRES\_E1L5 & 452480 & 2001-10-10 12:00:00 & 2017-10-13 23:45:00 & 53.08\\ -\hline -FRES & FRES\_E1T0 & 188186 & 2001-09-11 15:15:00 & 2017-09-01 19:00:00 & 128.98\\ -\hline -FRES & FRES\_E2L5 & 178744 & 2001-10-10 12:00:00 & 2017-10-13 23:30:00 & 25.64\\ -\hline -FRES & FRES\_E2T0 & 164910 & 2001-09-11 15:15:00 & 2017-10-12 23:45:00 & 122.44\\ -\hline -FRES & FRES\_E3L5 & 463006 & 2001-10-10 12:00:00 & 2017-10-13 23:00:00 & 50.65\\ -\hline -FRES & FRES\_E4L5 & 15752 & 2010-07-30 17:00:00 & 2017-09-18 19:45:00 & 60.89\\ -\hline -FRES & FRES\_E6L5 & 317352 & 2001-09-11 15:15:00 & 2017-10-13 23:00:00 & 85.32\\ -\hline -NEWP & NEWP\_E11L5 & 367422 & 2005-01-20 10:00:00 & 2017-09-28 23:15:00 & 72.32\\ -\hline -NEWP & NEWP\_E13L5 & 252979 & 2010-01-01 00:15:00 & 2017-09-28 23:45:00 & 126.20\\ -\hline -NEWP & NEWP\_E15L5 & 295094 & 2008-01-07 12:00:00 & 2017-10-10 23:45:00 & 76.95\\ -\hline -NEWP & NEWP\_E17L5 & 63422 & 2011-03-10 12:45:00 & 2017-10-11 23:30:00 & 11.44\\ -\hline -NEWP & NEWP\_E19L5 & 126299 & 2011-03-14 09:45:00 & 2017-10-11 23:45:00 & 18.38\\ -\hline -NEWP & NEWP\_E1L5 & 318151 & 2001-10-10 12:15:00 & 2017-09-26 23:45:00 & 45.66\\ -\hline -NEWP & NEWP\_E1T0 & 101494 & 2001-09-11 15:30:00 & 2017-09-18 19:45:00 & 475.07\\ -\hline -NEWP & NEWP\_E2L5 & 67835 & 2001-09-11 15:30:00 & 2017-09-26 22:45:00 & 58.44\\ -\hline -NEWP & NEWP\_E2T0 & 399812 & 2001-10-10 12:15:00 & 2017-09-27 12:00:00 & 426.55\\ -\hline -NEWP & NEWP\_E3L5 & 480643 & 2001-10-10 12:15:00 & 2017-09-26 23:45:00 & 73.64\\ -\hline -NEWP & NEWP\_E3T0 & 246265 & 2005-08-03 11:15:00 & 2017-09-26 23:45:00 & 383.05\\ -\hline -NEWP & NEWP\_E4L5 & 191514 & 2001-09-11 15:15:00 & 2020-12-31 07:15:00 & 105.57\\ -\hline -NEWP & NEWP\_E5L5 & 448392 & 2001-09-11 15:15:00 & 2017-09-27 23:45:00 & 42.46\\ -\hline -NEWP & NEWP\_E6L5 & 434217 & 2001-09-11 15:30:00 & 2017-09-27 23:45:00 & 69.91\\ -\hline -NEWP & NEWP\_E7L5 & 306799 & 2001-10-10 12:15:00 & 2017-09-27 23:15:00 & 71.96\\ -\hline -NEWP & NEWP\_E8L5 & 537871 & 2001-10-10 12:15:00 & 2017-09-27 23:30:00 & 139.40\\ -\hline -NEWP & NEWP\_E9L5 & 363063 & 2002-12-19 22:30:00 & 2017-09-28 23:45:00 & 101.30\\ -\hline -RYDE & RYDE\_E1L5 & 356616 & 2001-09-21 09:30:00 & 2017-10-11 23:45:00 & 70.48\\ -\hline -RYDE & RYDE\_E1T0 \&E1S0 & 251062 & 2001-10-10 12:15:00 & 2017-10-11 23:30:00 & 336.55\\ -\hline -RYDE & RYDE\_E2L5 & 297293 & 2001-09-21 09:30:00 & 2017-10-11 23:45:00 & 71.14\\ -\hline -RYDE & RYDE\_E2T0 & 238332 & 2001-10-10 12:15:00 & 2017-10-11 23:45:00 & 351.26\\ -\hline -RYDE & RYDE\_E3L5 & 304293 & 2001-09-21 09:30:00 & 2017-10-11 23:45:00 & 85.22\\ -\hline -RYDE & RYDE\_E4L5 & 519366 & 2001-12-20 15:30:00 & 2017-10-12 23:45:00 & 70.23\\ -\hline -RYDE & RYDE\_E5L5 & 362368 & 2001-09-21 09:30:00 & 2017-10-12 23:15:00 & 82.05\\ -\hline -RYDE & RYDE\_E6L5 & 442859 & 2001-09-21 09:30:00 & 2017-10-12 23:45:00 & 96.24\\ -\hline -RYDE & RYDE\_E7L5 & 324195 & 2001-09-21 09:30:00 & 2017-10-12 22:45:00 & 69.86\\ -\hline -RYDE & RYDE\_E8L5 & 275373 & 2001-10-10 12:15:00 & 2017-10-12 23:15:00 & 57.04\\ -\hline -RYDE & RYDE\_E9L5 & 267617 & 2001-09-25 17:00:00 & 2017-10-12 23:30:00 & 59.20\\ -\hline -SADO & SADO\_E1L5 & 212775 & 2001-09-21 13:30:00 & 2017-10-25 23:15:00 & 50.98\\ -\hline -SADO & SADO\_E1T0 & 421960 & 2001-09-21 13:30:00 & 2017-10-25 23:45:00 & 230.66\\ -\hline -SADO & SADO\_E2L5 & 178715 & 2001-09-21 13:30:00 & 2017-10-25 23:15:00 & 39.74\\ -\hline -SADO & SADO\_E2T0 & 412191 & 2001-10-10 12:15:00 & 2017-10-25 23:30:00 & 173.51\\ -\hline -SADO & SADO\_E3L5 & 272831 & 2001-09-21 13:30:00 & 2017-10-25 23:15:00 & 64.61\\ -\hline -SADO & SADO\_E4L5 & 479020 & 2001-09-21 13:30:00 & 2017-10-25 23:45:00 & 58.38\\ -\hline -SADO & SADO\_E5L5 & 343918 & 2001-09-21 13:30:00 & 2017-10-25 23:45:00 & 82.67\\ -\hline -SADO & SADO\_E6L5 & 239227 & 2001-09-21 13:30:00 & 2017-10-25 23:30:00 & 56.34\\ -\hline -SADO & SADO\_E8L5 & 282455 & 2004-08-16 17:45:00 & 2017-10-25 23:30:00 & 89.57\\ -\hline -SHAL & SHAL\_C3L5 & 163204 & 2001-10-10 12:45:00 & 2017-10-15 23:15:00 & 38.22\\ -\hline -SHAL & SHAL\_C4L5 & 187940 & 2001-09-11 15:30:00 & 2017-10-15 23:45:00 & 38.77\\ -\hline -SHAL & SHAL\_C5L5 & 29417 & 2015-12-03 15:00:00 & 2017-10-15 23:45:00 & 36.35\\ -\hline -SHAL & SHAL\_E1L5 & 465913 & 2001-10-10 12:15:00 & 2017-10-14 23:30:00 & 70.65\\ -\hline -SHAL & SHAL\_E1T0 & 181132 & 2001-10-10 12:15:00 & 2017-10-14 23:15:00 & 101.23\\ -\hline -SHAL & SHAL\_E2L5 & 290286 & 2001-10-10 12:15:00 & 2017-10-15 23:00:00 & 47.09\\ -\hline -SHAL & SHAL\_E2T0 & 174129 & 2001-10-10 12:30:00 & 2017-10-14 22:45:00 & 107.44\\ -\hline -SHAL & SHAL\_E3L5 & 258805 & 2010-03-11 07:00:00 & 2017-10-15 23:45:00 & 33.26\\ -\hline -SHAL & SHAL\_E4L5 & 322135 & 2001-09-11 15:30:00 & 2017-10-16 12:30:00 & 54.03\\ -\hline -SHAN & SHAN\_E1L5 & 288894 & 2001-09-21 14:15:00 & 2017-10-24 23:15:00 & 63.52\\ -\hline -SHAN & SHAN\_E1T0 & 330691 & 2001-10-10 12:15:00 & 2017-10-24 23:45:00 & 226.58\\ -\hline -SHAN & SHAN\_E2L5 & 321760 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 72.63\\ -\hline -SHAN & SHAN\_E2T0 & 315053 & 2001-10-10 12:15:00 & 2017-10-24 23:45:00 & 186.69\\ -\hline -SHAN & SHAN\_E3L5 & 105606 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 26.30\\ -\hline -SHAN & SHAN\_E4L5 & 216626 & 2001-09-21 14:15:00 & 2017-10-25 23:30:00 & 33.63\\ -\hline -SHAN & SHAN\_E5L5 & 254742 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 48.50\\ -\hline -SHAN & SHAN\_E6L5 & 363107 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 68.69\\ -\hline -SHAN & SHAN\_E7L5 & 384165 & 2001-09-21 14:15:00 & 2017-10-25 23:15:00 & 66.12\\ -\hline -SHAN & SHAN\_E8L5 & 146605 & 2002-02-05 17:30:00 & 2017-10-25 23:15:00 & 25.28\\ -\hline -VENT & VENT\_E1L5 & 203617 & 2001-09-11 15:45:00 & 2017-10-15 23:15:00 & 33.24\\ -\hline -VENT & VENT\_E1T0 & 240745 & 2001-09-11 15:30:00 & 2017-10-15 23:15:00 & 191.42\\ -\hline -VENT & VENT\_E2L5 & 402307 & 2001-09-27 14:00:00 & 2017-10-16 23:45:00 & 46.68\\ -\hline -VENT & VENT\_E2T0 & 208020 & 2001-09-11 15:45:00 & 2017-10-15 23:30:00 & 115.47\\ -\hline -VENT & VENT\_E3L5 & 493337 & 2001-09-11 15:45:00 & 2017-10-16 23:45:00 & 83.59\\ -\hline -VENT & VENT\_E4L5 & 387037 & 2001-09-11 15:45:00 & 2017-10-16 23:30:00 & 40.86\\ -\hline -VENT & VENT\_E5L5 & 481677 & 2001-09-27 14:00:00 & 2017-10-16 23:45:00 & 88.43\\ -\hline -VENT & VENT\_E6L5 & 6631 & 2001-09-27 14:00:00 & 2017-10-24 20:15:00 & 3.95\\ -\hline -\end{tabular} -\end{table} - -Do a duplicate check by feeder\_ID, dateTime \& kW. In theory there -should not be any. - -\begin{Shaded} -\begin{Highlighting}[] -\KeywordTok{message}\NormalTok{(}\StringTok{"Original data nrows: "}\NormalTok{, }\KeywordTok{tidyNum}\NormalTok{(}\KeywordTok{nrow}\NormalTok{(origDataDT)))} - -\KeywordTok{message}\NormalTok{(}\StringTok{"Unique data nrows: "}\NormalTok{, }\KeywordTok{tidyNum}\NormalTok{(}\KeywordTok{nrow}\NormalTok{(uniqDataDT)))} - -\KeywordTok{message}\NormalTok{(}\StringTok{"So we have "}\NormalTok{, }\KeywordTok{tidyNum}\NormalTok{(}\KeywordTok{nrow}\NormalTok{(origDataDT) }\OperatorTok{-}\StringTok{ }\KeywordTok{nrow}\NormalTok{(uniqDataDT)), }\StringTok{" duplicates..."}\NormalTok{)} - -\NormalTok{pc <-}\StringTok{ }\DecValTok{100}\OperatorTok{*}\NormalTok{((}\KeywordTok{nrow}\NormalTok{(origDataDT) }\OperatorTok{-}\StringTok{ }\KeywordTok{nrow}\NormalTok{(uniqDataDT))}\OperatorTok{/}\KeywordTok{nrow}\NormalTok{(origDataDT))} -\KeywordTok{message}\NormalTok{(}\StringTok{"That's "}\NormalTok{, }\KeywordTok{round}\NormalTok{(pc,}\DecValTok{2}\NormalTok{), }\StringTok{"%"}\NormalTok{)} - -\NormalTok{feederDT <-}\StringTok{ }\NormalTok{uniqDataDT[}\OperatorTok{!}\KeywordTok{is.na}\NormalTok{(rDateTime)] }\CommentTok{# use dt with no duplicates} -\NormalTok{origDataDT <-}\StringTok{ }\OtherTok{NULL} \CommentTok{# save memory} -\end{Highlighting} -\end{Shaded} - -So we remove the duplicates\ldots{} - -\section{Basic patterns}\label{basic-patterns} - -Try aggregated demand profiles of mean kW by season and feeder and day -of the week\ldots{} Remove the legend so we can see the plot. - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{plotDT <-}\StringTok{ }\NormalTok{feederDT[, .(}\DataTypeTok{meankW =} \KeywordTok{mean}\NormalTok{(kW),} - \DataTypeTok{nObs =}\NormalTok{ .N), keyby =}\StringTok{ }\NormalTok{.(rTime, season, feeder_ID, rDoW)]} - -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(plotDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x =}\NormalTok{ rTime, }\DataTypeTok{y =}\NormalTok{ meankW, }\DataTypeTok{colour =}\NormalTok{ feeder_ID)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_line}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\StringTok{"none"}\NormalTok{) }\OperatorTok{+}\StringTok{ }\CommentTok{# remove legend so we can see the plot} -\StringTok{ }\KeywordTok{facet_grid}\NormalTok{(season }\OperatorTok{~}\StringTok{ }\NormalTok{rDoW)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/kwProfiles-1.pdf} - -Is that what we expect? - -\section{Test for missing}\label{test-for-missing} - -Number of observations per feeder per day - gaps will be visible -(totally missing days) as will low counts (partially missing days) - we -would expect 24 * 4\ldots{} Convert this to a \% of expected\ldots{} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{plotDT <-}\StringTok{ }\NormalTok{feederDT[, .(}\DataTypeTok{nObs =}\NormalTok{ .N), keyby =}\StringTok{ }\NormalTok{.(rDate, feeder_ID)]} -\NormalTok{plotDT[, propExpected }\OperatorTok{:}\ErrorTok{=}\StringTok{ }\NormalTok{nObs}\OperatorTok{/}\NormalTok{(}\DecValTok{24}\OperatorTok{*}\DecValTok{4}\NormalTok{)]} - -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(plotDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x =}\NormalTok{ rDate, }\DataTypeTok{y =}\NormalTok{ feeder_ID, }\DataTypeTok{fill =} \DecValTok{100}\OperatorTok{*}\NormalTok{propExpected)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_tile}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale_x_date}\NormalTok{(}\DataTypeTok{date_breaks =} \StringTok{"3 months"}\NormalTok{, }\DataTypeTok{date_labels =} \StringTok{"%B %Y"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{axis.text.x=}\KeywordTok{element_text}\NormalTok{(}\DataTypeTok{angle=}\DecValTok{90}\NormalTok{, }\DataTypeTok{hjust=}\DecValTok{1}\NormalTok{)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\StringTok{"bottom"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale_fill_viridis_c}\NormalTok{(}\DataTypeTok{name=}\StringTok{"% expected"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/basicCountTile-1.pdf} - -This is not good. There are both gaps (missing days) and partial days. -\textbf{Lots} of partial days. Why is the data relatively good up to the -end of 2003? - -What does it look like if we aggregate across all feeders by time? There -are 78 feeders so we should get this many at best How close do we get? - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{plotDT <-}\StringTok{ }\NormalTok{feederDT[, .(}\DataTypeTok{nObs =}\NormalTok{ .N,} - \DataTypeTok{meankW =} \KeywordTok{mean}\NormalTok{(kW)), keyby =}\StringTok{ }\NormalTok{.(rTime, rDate, season)]} - -\NormalTok{plotDT[, propExpected }\OperatorTok{:}\ErrorTok{=}\StringTok{ }\NormalTok{nObs}\OperatorTok{/}\KeywordTok{uniqueN}\NormalTok{(feederDT}\OperatorTok{$}\NormalTok{feeder_ID)] }\CommentTok{# we now have all feeders per time so...} - -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(plotDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x =}\NormalTok{ rDate, }\DataTypeTok{y =}\NormalTok{ rTime, }\DataTypeTok{fill =} \DecValTok{100}\OperatorTok{*}\NormalTok{propExpected)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_tile}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale_x_date}\NormalTok{(}\DataTypeTok{date_breaks =} \StringTok{"6 months"}\NormalTok{, }\DataTypeTok{date_labels =} \StringTok{"%B %Y"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{axis.text.x=}\KeywordTok{element_text}\NormalTok{(}\DataTypeTok{angle=}\DecValTok{90}\NormalTok{, }\DataTypeTok{hjust=}\DecValTok{1}\NormalTok{)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\StringTok{"bottom"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale_fill_viridis_c}\NormalTok{(}\DataTypeTok{name=}\StringTok{"% expected"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/aggVisN-1.pdf} - -That really doesn't look too good. There are some very odd fluctuations -in there. And something changed after 2003\ldots{} - -What do the mean kw patterns look like per feeder per day? - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{plotDT <-}\StringTok{ }\NormalTok{feederDT[, .(}\DataTypeTok{meankW =} \KeywordTok{mean}\NormalTok{(kW, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{)), keyby =}\StringTok{ }\NormalTok{.(rDate, feeder_ID)]} - -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(plotDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x =}\NormalTok{ rDate, }\DataTypeTok{y =}\NormalTok{ feeder_ID, }\DataTypeTok{fill =}\NormalTok{ meankW)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_tile}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale_x_date}\NormalTok{(}\DataTypeTok{date_breaks =} \StringTok{"3 months"}\NormalTok{, }\DataTypeTok{date_labels =} \StringTok{"%B %Y"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{axis.text.x=}\KeywordTok{element_text}\NormalTok{(}\DataTypeTok{angle=}\DecValTok{90}\NormalTok{, }\DataTypeTok{hjust=}\DecValTok{1}\NormalTok{)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\StringTok{"bottom"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale_fill_viridis_c}\NormalTok{(}\DataTypeTok{name=}\StringTok{"Mean kW"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/basickWTile-1.pdf} - -Missing data is even more clearly visible. - -What about mean kw across all feeders? - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{plotDT <-}\StringTok{ }\NormalTok{feederDT[, .(}\DataTypeTok{nObs =}\NormalTok{ .N,} - \DataTypeTok{meankW =} \KeywordTok{mean}\NormalTok{(kW)), keyby =}\StringTok{ }\NormalTok{.(rTime, rDate, season)]} - -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(plotDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x =}\NormalTok{ rDate, }\DataTypeTok{y =}\NormalTok{ rTime, }\DataTypeTok{fill =}\NormalTok{ meankW)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_tile}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale_x_date}\NormalTok{(}\DataTypeTok{date_breaks =} \StringTok{"6 months"}\NormalTok{, }\DataTypeTok{date_labels =} \StringTok{"%B %Y"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{axis.text.x=}\KeywordTok{element_text}\NormalTok{(}\DataTypeTok{angle=}\DecValTok{90}\NormalTok{, }\DataTypeTok{hjust=}\DecValTok{1}\NormalTok{)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{theme}\NormalTok{(}\DataTypeTok{legend.position=}\StringTok{"bottom"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale_fill_viridis_c}\NormalTok{(}\DataTypeTok{name=}\StringTok{"kW"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/aggViskW-1.pdf} - -\section{\texorpdfstring{Which days have the `least' -missing?}{Which days have the least missing?}}\label{which-days-have-the-least-missing} - -This is quite tricky as we may have completely missing dateTimes. But we -can test for this by counting the number of observations per dateTime -and then seeing if the dateTimes are contiguous. - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{dateTimesDT <-}\StringTok{ }\NormalTok{feederDT[, .(}\DataTypeTok{nFeeders =} \KeywordTok{uniqueN}\NormalTok{(feeder_ID),} - \DataTypeTok{meankW =} \KeywordTok{mean}\NormalTok{(kW, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{)), } -\NormalTok{ keyby =}\StringTok{ }\NormalTok{.(rDateTime, rTime, rDate, season)] }\CommentTok{# keep season} -\NormalTok{dateTimesDT[, dtDiff }\OperatorTok{:}\ErrorTok{=}\StringTok{ }\NormalTok{rDateTime }\OperatorTok{-}\StringTok{ }\KeywordTok{shift}\NormalTok{(rDateTime)] }\CommentTok{# should be 15 mins} - - -\KeywordTok{summary}\NormalTok{(dateTimesDT)} -\end{Highlighting} -\end{Shaded} - -\begin{verbatim} -## rDateTime rTime rDate -## Min. :2001-09-11 15:15:00 Length:549529 Min. :2001-09-11 -## 1st Qu.:2006-02-13 01:30:00 Class1:hms 1st Qu.:2006-02-13 -## Median :2010-01-20 06:00:00 Class2:difftime Median :2010-01-20 -## Mean :2010-01-05 16:51:28 Mode :numeric Mean :2010-01-05 -## 3rd Qu.:2013-12-22 21:30:00 3rd Qu.:2013-12-22 -## Max. :2020-12-31 07:15:00 Max. :2020-12-31 -## -## season nFeeders meankW dtDiff -## Spring:137919 Min. : 1.00 Min. : 0.00 Length:549529 -## Summer:132245 1st Qu.:31.00 1st Qu.: 80.41 Class :difftime -## Autumn:141490 Median :39.00 Median : 96.95 Mode :numeric -## Winter:137875 Mean :39.72 Mean : 97.00 -## 3rd Qu.:47.00 3rd Qu.:113.00 -## Max. :77.00 Max. :439.56 -## NA's :1 -\end{verbatim} - -Let's see how many unique feeders we have per dateTime. Surely we have -at least one sending data each half-hour? - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(dateTimesDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x =}\NormalTok{ rDate, }\DataTypeTok{y =}\NormalTok{ rTime, }\DataTypeTok{fill =}\NormalTok{ nFeeders)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_tile}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{scale_fill_viridis_c}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{caption =} \StringTok{"Number of unique feeders in each dateTime"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/tileFeeders-1.pdf} - -No. As we suspected from the previous plots, we clearly have some -dateTimes where we have no data \emph{at all}! - -Are there time of day patterns? It looks like it\ldots{} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{dateTimesDT[, rYear }\OperatorTok{:}\ErrorTok{=}\StringTok{ }\NormalTok{lubridate}\OperatorTok{::}\KeywordTok{year}\NormalTok{(rDateTime)]} -\NormalTok{plotDT <-}\StringTok{ }\NormalTok{dateTimesDT[, .(}\DataTypeTok{meanN =} \KeywordTok{mean}\NormalTok{(nFeeders),} - \DataTypeTok{meankW =} \KeywordTok{mean}\NormalTok{(meankW)), keyby =}\StringTok{ }\NormalTok{.(rTime, season, rYear)]} - -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(plotDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{y =}\NormalTok{ meanN, }\DataTypeTok{x =}\NormalTok{ rTime, }\DataTypeTok{colour =}\NormalTok{ season)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_line}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{facet_wrap}\NormalTok{(rYear }\OperatorTok{~}\StringTok{ }\NormalTok{.) }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{y =} \StringTok{"Mean n feeders reporting"}\NormalTok{,} - \DataTypeTok{caption =} \StringTok{"Mean n feeders by time of day"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/missingProfiles-1.pdf} - -Oh yes. After 2003. Why? - -What about the kW? - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(plotDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{y =}\NormalTok{ meankW, }\DataTypeTok{x =}\NormalTok{ rTime, }\DataTypeTok{colour =}\NormalTok{ season)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_line}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{facet_wrap}\NormalTok{(rYear }\OperatorTok{~}\StringTok{ }\NormalTok{.) }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{y =} \StringTok{"Mean kw reporting"}\NormalTok{,} - \DataTypeTok{caption =} \StringTok{"Mean kw by time of day"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/kWProfiles-1.pdf} - -Those look as we'd expect. But do we see a correlation between the -number of observations per hour and the mean kW after 2003? There is a -suspicion that as mean kw goes up so do the number of observations per -hour\ldots{} although this could just be a correlation with low demand -periods (night time?) - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(plotDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{y =}\NormalTok{ meankW, }\DataTypeTok{x =}\NormalTok{ meanN, }\DataTypeTok{colour =}\NormalTok{ season)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_point}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{facet_wrap}\NormalTok{(rYear }\OperatorTok{~}\StringTok{ }\NormalTok{.) }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{y =} \StringTok{"Mean kw per quarter hour"}\NormalTok{,} - \DataTypeTok{x =} \StringTok{"Mean number feeders reporting"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/compareProfiles-1.pdf} - -Yes. The higher the kW, the more observations we get from 2004 onwards. -Why? - -It is distinctly odd that after 2003: - -\begin{itemize} -\tightlist -\item - we appear to have the most feeders reporting data at `peak' times -\item - we have a lot of missing dateTimes between 00:30 and 05:00 -\end{itemize} - -If the monitors were set to only collect data when the power (or Wh in a -given time frame) was above a given threshold then it would look like -this\ldots{} That wouldn't happen\ldots{} would it? - -\section{\texorpdfstring{Selecting the `best' -days}{Selecting the best days}}\label{selecting-the-best-days} - -Here we use a wide form of the feeder data which has each feeder as a -column. - -We should have 78 feeders. We want to find days when all of these -feeders have complete data. - -The wide dataset has a count of NAs per row (dateTime) from which we -infer how many feeders are reporting: - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{wDT <-}\StringTok{ }\NormalTok{drake}\OperatorTok{::}\KeywordTok{readd}\NormalTok{(wideData) }\CommentTok{# back from the drake} -\KeywordTok{names}\NormalTok{(wDT)} -\end{Highlighting} -\end{Shaded} - -\begin{verbatim} -## [1] "rDateTime" "ARRN_ARRN" "BINS_C1T0" -## [4] "BINS_C2T0" "BINS_E1L5" "BINS_E2L5" -## [7] "BINS_E3L5" "FFPV_FFPV" "FRES_E1L5" -## [10] "FRES_E1T0" "FRES_E2L5" "FRES_E2T0" -## [13] "FRES_E3L5" "FRES_E4L5" "FRES_E6L5" -## [16] "NEWP_E11L5" "NEWP_E13L5" "NEWP_E15L5" -## [19] "NEWP_E17L5" "NEWP_E19L5" "NEWP_E1L5" -## [22] "NEWP_E1T0" "NEWP_E2L5" "NEWP_E2T0" -## [25] "NEWP_E3L5" "NEWP_E3T0" "NEWP_E4L5" -## [28] "NEWP_E5L5" "NEWP_E6L5" "NEWP_E7L5" -## [31] "NEWP_E8L5" "NEWP_E9L5" "RYDE_E1L5" -## [34] "RYDE_E1T0 &E1S0" "RYDE_E2L5" "RYDE_E2T0" -## [37] "RYDE_E3L5" "RYDE_E4L5" "RYDE_E5L5" -## [40] "RYDE_E6L5" "RYDE_E7L5" "RYDE_E8L5" -## [43] "RYDE_E9L5" "SADO_E1L5" "SADO_E1T0" -## [46] "SADO_E2L5" "SADO_E2T0" "SADO_E3L5" -## [49] "SADO_E4L5" "SADO_E5L5" "SADO_E6L5" -## [52] "SADO_E8L5" "SHAL_C3L5" "SHAL_C4L5" -## [55] "SHAL_C5L5" "SHAL_E1L5" "SHAL_E1T0" -## [58] "SHAL_E2L5" "SHAL_E2T0" "SHAL_E3L5" -## [61] "SHAL_E4L5" "SHAN_E1L5" "SHAN_E1T0" -## [64] "SHAN_E2L5" "SHAN_E2T0" "SHAN_E3L5" -## [67] "SHAN_E4L5" "SHAN_E5L5" "SHAN_E6L5" -## [70] "SHAN_E7L5" "SHAN_E8L5" "VENT_E1L5" -## [73] "VENT_E1T0" "VENT_E2L5" "VENT_E2T0" -## [76] "VENT_E3L5" "VENT_E4L5" "VENT_E5L5" -## [79] "VENT_E6L5" "nNA" "nFeedersReporting" -\end{verbatim} - -If we take the mean of the number of feeders reporting per day (date) -then a value of 25 will indicate a day when \emph{all} feeders have -\emph{all} data (since it would be the mean of all the '25's). - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{wDT <-}\StringTok{ }\KeywordTok{addSeason}\NormalTok{(wDT, }\DataTypeTok{dateVar =} \StringTok{"rDateTime"}\NormalTok{, }\DataTypeTok{h =} \StringTok{"N"}\NormalTok{)} -\NormalTok{wDT[, rDoW }\OperatorTok{:}\ErrorTok{=}\StringTok{ }\NormalTok{lubridate}\OperatorTok{::}\KeywordTok{wday}\NormalTok{(rDateTime)]} -\NormalTok{wDT[, rDate }\OperatorTok{:}\ErrorTok{=}\StringTok{ }\NormalTok{lubridate}\OperatorTok{::}\KeywordTok{date}\NormalTok{(rDateTime)]} - -\CommentTok{# how many days have all feeders sending data in all dateTimes?} - -\NormalTok{aggDT <-}\StringTok{ }\NormalTok{wDT[, .(}\DataTypeTok{meanOK =} \KeywordTok{mean}\NormalTok{(nFeedersReporting),} - \DataTypeTok{minOk =} \KeywordTok{min}\NormalTok{(nFeedersReporting),} - \DataTypeTok{maxOk =} \KeywordTok{max}\NormalTok{(nFeedersReporting),} - \DataTypeTok{sumOK =} \KeywordTok{sum}\NormalTok{(nFeedersReporting) }\CommentTok{# will have a max of n feeders * 24 hours * 4 quarter hours} -\NormalTok{),} -\NormalTok{keyby =}\StringTok{ }\NormalTok{.(rDate, season)]} - -\NormalTok{aggDT[, propExpected }\OperatorTok{:}\ErrorTok{=}\StringTok{ }\NormalTok{sumOK}\OperatorTok{/}\NormalTok{(}\KeywordTok{uniqueN}\NormalTok{(feederDT}\OperatorTok{$}\NormalTok{feeder_ID)}\OperatorTok{*}\DecValTok{24}\OperatorTok{*}\DecValTok{4}\NormalTok{)] }\CommentTok{# we expect 25*24*4} - -\KeywordTok{summary}\NormalTok{(aggDT)} -\end{Highlighting} -\end{Shaded} - -\begin{verbatim} -## rDate season meanOK minOk -## Min. :2001-09-11 Spring:1531 Min. : 1.00 Min. : 0.0 -## 1st Qu.:2006-04-21 Summer:1471 1st Qu.:34.05 1st Qu.:14.0 -## Median :2010-06-23 Autumn:1568 Median :37.38 Median :18.0 -## Mean :2010-07-24 Winter:1525 Mean :37.54 Mean :21.2 -## 3rd Qu.:2014-08-24 3rd Qu.:40.85 3rd Qu.:22.0 -## Max. :2020-12-31 Max. :63.85 Max. :62.0 -## NA's :1 -## maxOk sumOK propExpected -## Min. : 1.00 Min. : 1 Min. :0.0001335 -## 1st Qu.:53.00 1st Qu.:3261 1st Qu.:0.4354968 -## Median :57.00 Median :3582 Median :0.4783654 -## Mean :54.27 Mean :3581 Mean :0.4782117 -## 3rd Qu.:61.00 3rd Qu.:3916 3rd Qu.:0.5229033 -## Max. :77.00 Max. :6130 Max. :0.8186432 -## -\end{verbatim} - -\begin{Shaded} -\begin{Highlighting}[] -\KeywordTok{message}\NormalTok{(}\StringTok{"How many days have 100%?"}\NormalTok{)} -\NormalTok{n <-}\StringTok{ }\KeywordTok{nrow}\NormalTok{(aggDT[propExpected }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{])} -\NormalTok{n} -\end{Highlighting} -\end{Shaded} - -\begin{verbatim} -## [1] 0 -\end{verbatim} - -So, there are 0 days with 100\% data\ldots{} - -If we plot the mean then we will see which days get closest to having a -full dataset. - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(aggDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x =}\NormalTok{ rDate, }\DataTypeTok{colour =}\NormalTok{ season, }\DataTypeTok{y =}\NormalTok{ meanOK)) }\OperatorTok{+}\StringTok{ }\KeywordTok{geom_point}\NormalTok{()} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/bestDaysMean-1.pdf} - -Re-plot by the \% of expected if we assume we \emph{should} have 25 -feeders * 24 hours * 4 per hour (will be the same shape): - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{ggplot2}\OperatorTok{::}\KeywordTok{ggplot}\NormalTok{(aggDT, }\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x =}\NormalTok{ rDate, }\DataTypeTok{colour =}\NormalTok{ season, }\DataTypeTok{y =} \DecValTok{100}\OperatorTok{*}\NormalTok{propExpected)) }\OperatorTok{+}\StringTok{ }\KeywordTok{geom_point}\NormalTok{() }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{y =} \StringTok{"%"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\includegraphics{/home/ba1e12/git.Soton/ba1e12/datacleaning/docs/cleaningFeederData_allData_files/figure-latex/bestDaysProp-1.pdf} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{aggDT[, rDoW }\OperatorTok{:}\ErrorTok{=}\StringTok{ }\NormalTok{lubridate}\OperatorTok{::}\KeywordTok{wday}\NormalTok{(rDate, }\DataTypeTok{lab =} \OtherTok{TRUE}\NormalTok{)]} -\NormalTok{h <-}\StringTok{ }\KeywordTok{head}\NormalTok{(aggDT[season }\OperatorTok{==}\StringTok{ "Spring"}\NormalTok{][}\KeywordTok{order}\NormalTok{(}\OperatorTok{-}\NormalTok{propExpected)])} -\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(h, }\DataTypeTok{caption =} \StringTok{"Best Spring days overall"}\NormalTok{, } - \DataTypeTok{digits =} \DecValTok{3}\NormalTok{) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} -\end{Highlighting} -\end{Shaded} - -\begin{table} - -\caption{\label{tab:bestDaysProp}Best Spring days overall} -\centering -\begin{tabular}[t]{l|l|r|r|r|r|r|l} -\hline -rDate & season & meanOK & minOk & maxOk & sumOK & propExpected & rDoW\\ -\hline -2002-04-14 & Spring & 63.490 & 60 & 65 & 6095 & 0.814 & Sun\\ -\hline -2003-03-20 & Spring & 63.458 & 61 & 67 & 6092 & 0.814 & Thu\\ -\hline -2002-03-21 & Spring & 63.385 & 62 & 65 & 6085 & 0.813 & Thu\\ -\hline -2003-03-14 & Spring & 63.385 & 61 & 65 & 6085 & 0.813 & Fri\\ -\hline -2003-03-16 & Spring & 63.375 & 60 & 65 & 6084 & 0.812 & Sun\\ -\hline -2003-03-17 & Spring & 63.375 & 61 & 65 & 6084 & 0.812 & Mon\\ -\hline -\end{tabular} -\end{table} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{h <-}\StringTok{ }\KeywordTok{head}\NormalTok{(aggDT[season }\OperatorTok{==}\StringTok{ "Summer"}\NormalTok{][}\KeywordTok{order}\NormalTok{(}\OperatorTok{-}\NormalTok{propExpected)])} -\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(h, }\DataTypeTok{caption =} \StringTok{"Best Summer days overall"}\NormalTok{, } - \DataTypeTok{digits =} \DecValTok{3}\NormalTok{) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} -\end{Highlighting} -\end{Shaded} - -\begin{table} - -\caption{\label{tab:bestDaysProp}Best Summer days overall} -\centering -\begin{tabular}[t]{l|l|r|r|r|r|r|l} -\hline -rDate & season & meanOK & minOk & maxOk & sumOK & propExpected & rDoW\\ -\hline -2003-08-22 & Summer & 63.854 & 56 & 65 & 6130 & 0.819 & Fri\\ -\hline -2003-08-30 & Summer & 63.844 & 60 & 66 & 6129 & 0.819 & Sat\\ -\hline -2003-08-31 & Summer & 63.812 & 59 & 66 & 6126 & 0.818 & Sun\\ -\hline -2003-08-23 & Summer & 63.677 & 56 & 66 & 6113 & 0.816 & Sat\\ -\hline -2003-08-25 & Summer & 63.677 & 55 & 66 & 6113 & 0.816 & Mon\\ -\hline -2003-08-26 & Summer & 63.656 & 58 & 66 & 6111 & 0.816 & Tue\\ -\hline -\end{tabular} -\end{table} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{h <-}\StringTok{ }\KeywordTok{head}\NormalTok{(aggDT[season }\OperatorTok{==}\StringTok{ "Autumn"}\NormalTok{][}\KeywordTok{order}\NormalTok{(}\OperatorTok{-}\NormalTok{propExpected)])} -\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(h, }\DataTypeTok{caption =} \StringTok{"Best Autumn days overall"}\NormalTok{,} - \DataTypeTok{digits =} \DecValTok{3}\NormalTok{) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} -\end{Highlighting} -\end{Shaded} - -\begin{table} - -\caption{\label{tab:bestDaysProp}Best Autumn days overall} -\centering -\begin{tabular}[t]{l|l|r|r|r|r|r|l} -\hline -rDate & season & meanOK & minOk & maxOk & sumOK & propExpected & rDoW\\ -\hline -2003-09-02 & Autumn & 63.823 & 57 & 66 & 6127 & 0.818 & Tue\\ -\hline -2003-09-01 & Autumn & 63.771 & 56 & 65 & 6122 & 0.818 & Mon\\ -\hline -2003-09-07 & Autumn & 63.740 & 57 & 66 & 6119 & 0.817 & Sun\\ -\hline -2003-09-03 & Autumn & 63.667 & 57 & 65 & 6112 & 0.816 & Wed\\ -\hline -2003-09-04 & Autumn & 63.615 & 57 & 66 & 6107 & 0.816 & Thu\\ -\hline -2003-09-06 & Autumn & 63.552 & 57 & 65 & 6101 & 0.815 & Sat\\ -\hline -\end{tabular} -\end{table} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{h <-}\StringTok{ }\KeywordTok{head}\NormalTok{(aggDT[season }\OperatorTok{==}\StringTok{ "Winter"}\NormalTok{][}\KeywordTok{order}\NormalTok{(}\OperatorTok{-}\NormalTok{propExpected)])} -\NormalTok{kableExtra}\OperatorTok{::}\KeywordTok{kable}\NormalTok{(h, }\DataTypeTok{caption =} \StringTok{"Best Winter days overall"}\NormalTok{, } - \DataTypeTok{digits =} \DecValTok{3}\NormalTok{) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{kable_styling}\NormalTok{()} -\end{Highlighting} -\end{Shaded} - -\begin{table} - -\caption{\label{tab:bestDaysProp}Best Winter days overall} -\centering -\begin{tabular}[t]{l|l|r|r|r|r|r|l} -\hline -rDate & season & meanOK & minOk & maxOk & sumOK & propExpected & rDoW\\ -\hline -2002-02-28 & Winter & 63.292 & 60 & 65 & 6076 & 0.811 & Thu\\ -\hline -2002-02-25 & Winter & 63.125 & 61 & 65 & 6060 & 0.809 & Mon\\ -\hline -2002-12-11 & Winter & 62.979 & 61 & 64 & 6046 & 0.807 & Wed\\ -\hline -2002-12-01 & Winter & 62.917 & 61 & 64 & 6040 & 0.807 & Sun\\ -\hline -2003-01-01 & Winter & 62.906 & 60 & 64 & 6039 & 0.806 & Wed\\ -\hline -2003-01-03 & Winter & 62.906 & 60 & 64 & 6039 & 0.806 & Fri\\ -\hline -\end{tabular} -\end{table} - -This also tells us that there is some reason why we get fluctations in -the number of data points per hour after 2003. - -\section{Summary}\label{summary} - -So there are no days with 100\% data. We need a different approach. - -\section{Runtime}\label{runtime} - -Analysis completed in 221.35 seconds ( 3.69 minutes) using -\href{https://cran.r-project.org/package=knitr}{knitr} in -\href{http://www.rstudio.com}{RStudio} with R version 3.6.0 (2019-04-26) -running on x86\_64-redhat-linux-gnu. - -\section{R environment}\label{r-environment} - -\subsection{R packages used}\label{r-packages-used} - -\begin{itemize} -\tightlist -\item - base R (R Core Team 2016) -\item - bookdown (Xie 2018) -\item - data.table (Dowle et al. 2015) -\item - ggplot2 (Wickham 2009) -\item - kableExtra (Zhu 2019) -\item - knitr (Xie 2016) -\item - lubridate (Grolemund and Wickham 2011) -\item - rmarkdown (Allaire et al. 2020) -\item - skimr (Arino de la Rubia et al. 2017) -\end{itemize} - -\subsection{Session info}\label{session-info} - -\begin{verbatim} -## R version 3.6.0 (2019-04-26) -## Platform: x86_64-redhat-linux-gnu (64-bit) -## Running under: Red Hat Enterprise Linux -## -## Matrix products: default -## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so -## -## locale: -## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C -## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8 -## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8 -## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C -## [9] LC_ADDRESS=C LC_TELEPHONE=C -## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C -## -## attached base packages: -## [1] stats graphics grDevices utils datasets methods base -## -## other attached packages: -## [1] kableExtra_1.1.0 skimr_2.1.1 ggplot2_3.3.1 hms_0.5.3 -## [5] lubridate_1.7.9 here_0.1 drake_7.12.2 data.table_1.12.0 -## [9] dataCleaning_0.1.0 -## -## loaded via a namespace (and not attached): -## [1] storr_1.2.1 progress_1.2.2 tidyselect_1.1.0 xfun_0.14 -## [5] repr_1.1.0 purrr_0.3.4 colorspace_1.4-0 vctrs_0.3.1 -## [9] generics_0.0.2 viridisLite_0.3.0 htmltools_0.3.6 yaml_2.2.0 -## [13] base64enc_0.1-3 rlang_0.4.6 pillar_1.4.4 txtq_0.2.0 -## [17] glue_1.4.1 withr_2.1.2 lifecycle_0.2.0 stringr_1.4.0 -## [21] munsell_0.5.0 gtable_0.2.0 rvest_0.3.5 evaluate_0.14 -## [25] labeling_0.3 knitr_1.28 parallel_3.6.0 fansi_0.4.0 -## [29] Rcpp_1.0.1 readr_1.3.1 scales_1.0.0 backports_1.1.3 -## [33] filelock_1.0.2 webshot_0.5.2 jsonlite_1.6 digest_0.6.25 -## [37] stringi_1.2.4 dplyr_1.0.0 grid_3.6.0 rprojroot_1.3-2 -## [41] cli_2.0.2 tools_3.6.0 magrittr_1.5 base64url_1.4 -## [45] tibble_3.0.1 crayon_1.3.4 pkgconfig_2.0.2 ellipsis_0.3.1 -## [49] xml2_1.3.2 prettyunits_1.0.2 httr_1.4.1 assertthat_0.2.0 -## [53] rmarkdown_2.2 rstudioapi_0.11 R6_2.3.0 igraph_1.2.2 -## [57] compiler_3.6.0 -\end{verbatim} - -\section{The raw data cleaning code}\label{the-raw-data-cleaning-code} - -\begin{enumerate} -\def\labelenumi{(\alph{enumi})} -\setcounter{enumi}{2} -\tightlist -\item - Mikey Harper :-) -\end{enumerate} - -Starts here: - -Scripts used clean and merge substation data. - -\subsection{Input files}\label{input-files} - -Analysis will first look at the primary data. There are different types -of files which refer to different paramters. Different search terms are -used to extract these: - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{# Find files with AMPS. Exclude files which contain DI~CO} -\NormalTok{files_AMPS <-}\StringTok{ }\KeywordTok{list.files}\NormalTok{(}\StringTok{"../Primary"}\NormalTok{, }\DataTypeTok{recursive =}\NormalTok{ T, }\DataTypeTok{pattern =} \StringTok{"~AMPS"}\NormalTok{, }\DataTypeTok{full.names =}\NormalTok{ T) }\OperatorTok{%>%} -\StringTok{ }\NormalTok{.[}\OperatorTok{!}\NormalTok{stringr}\OperatorTok{::}\KeywordTok{str_detect}\NormalTok{ (., }\StringTok{"DI~CO"}\NormalTok{)]} - -\NormalTok{files_AMPS} -\end{Highlighting} -\end{Shaded} - -\subsection{Process Amps}\label{process-amps} - -\begin{Shaded} -\begin{Highlighting}[] -\CommentTok{# Show a sample} -\NormalTok{fileSelect <-}\StringTok{ }\NormalTok{files_AMPS[}\DecValTok{4}\NormalTok{]} -\KeywordTok{head}\NormalTok{(}\KeywordTok{read_csv}\NormalTok{(fileSelect, }\DataTypeTok{skip =} \DecValTok{3}\NormalTok{))} -\end{Highlighting} -\end{Shaded} - -Again a function is used to do all the processing on the input CSVs. -This is slightly amended from the \texttt{processkV} function. - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{processAMPS <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(filePath, }\DataTypeTok{databaseCon =}\NormalTok{ con)\{} - - \KeywordTok{message}\NormalTok{(}\StringTok{"Processing "}\NormalTok{, filePath)} - - \CommentTok{# 1st Level} -\NormalTok{ dirName_}\DecValTok{1}\NormalTok{ <-}\StringTok{ }\NormalTok{filePath }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\KeywordTok{dirname}\NormalTok{() }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\NormalTok{basename} - - \CommentTok{# 2nd Level} -\NormalTok{ dirName_}\DecValTok{2}\NormalTok{ <-}\StringTok{ }\NormalTok{filePath }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\KeywordTok{dirname}\NormalTok{() }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\KeywordTok{dirname}\NormalTok{() }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\NormalTok{basename} - - \ControlFlowTok{if}\NormalTok{ (dirName_}\DecValTok{2} \OperatorTok{==}\StringTok{ "Primary"}\NormalTok{)\{} -\NormalTok{ dirName_}\DecValTok{2}\NormalTok{ <-}\StringTok{ }\NormalTok{dirName_}\DecValTok{1} -\NormalTok{ dirName_}\DecValTok{1}\NormalTok{ <-}\StringTok{ ""} -\NormalTok{ \}} - - \CommentTok{# Load the CSV. There were some tab seperated files which are saved as CSVs, which confuse the search. There if the data is loaded incorrectly (only having a single column), the code will try and load it as a TSV.} -\NormalTok{ dataLoaded <-}\StringTok{ }\KeywordTok{suppressWarnings}\NormalTok{(}\KeywordTok{read_csv}\NormalTok{(filePath, }\DataTypeTok{skip =} \DecValTok{3}\NormalTok{, }\DataTypeTok{col_types =} \KeywordTok{cols}\NormalTok{(}\DataTypeTok{Value =} \KeywordTok{col_number}\NormalTok{())))} - \ControlFlowTok{if}\NormalTok{(}\KeywordTok{ncol}\NormalTok{(dataLoaded) }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{)\{} -\NormalTok{ dataLoaded <-}\StringTok{ }\KeywordTok{suppressWarnings}\NormalTok{(}\KeywordTok{read_tsv}\NormalTok{(filePath, }\DataTypeTok{skip =} \DecValTok{3}\NormalTok{, }\DataTypeTok{col_types =} \KeywordTok{cols}\NormalTok{()))} -\NormalTok{ \}} - - \CommentTok{# Reformat data} -\NormalTok{ dataLoaded <-} -\StringTok{ }\NormalTok{dataLoaded }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{mutate_at}\NormalTok{(}\KeywordTok{vars}\NormalTok{(Time), }\ControlFlowTok{function}\NormalTok{(x)\{}\KeywordTok{gsub}\NormalTok{(}\StringTok{'[^ -~]'}\NormalTok{, }\StringTok{''}\NormalTok{, x)\}) }\OperatorTok{%>%}\StringTok{ }\CommentTok{# Remove invalid UTF characters} -\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{Time =}\NormalTok{ lubridate}\OperatorTok{::}\KeywordTok{dmy_hms}\NormalTok{(Time),} - \DataTypeTok{Time =}\NormalTok{ lubridate}\OperatorTok{::}\KeywordTok{floor_date}\NormalTok{(Time, }\DataTypeTok{unit =} \StringTok{"15 minutes"}\NormalTok{)) }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\KeywordTok{group_by}\NormalTok{(Time) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{Value =} \KeywordTok{mean}\NormalTok{(Value, }\DataTypeTok{na.rm =}\NormalTok{ T)) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{region =}\NormalTok{ dirName_}\DecValTok{2}\NormalTok{,} - \DataTypeTok{sub_region =}\NormalTok{ dirName_}\DecValTok{1} -\NormalTok{ )} - - \CommentTok{# There are some datasets which contain no values, whch can cause errors in running} - \CommentTok{# If this happens, return NULL} - \ControlFlowTok{if}\NormalTok{(}\KeywordTok{is.character}\NormalTok{(dataLoaded}\OperatorTok{$}\NormalTok{Value)) }\KeywordTok{return}\NormalTok{(}\OtherTok{NULL}\NormalTok{)} - - \KeywordTok{return}\NormalTok{(dataLoaded)} -\NormalTok{\}} -\end{Highlighting} -\end{Shaded} - -Run the function below: - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{Amps <-}\StringTok{ }\NormalTok{purrr}\OperatorTok{::}\KeywordTok{map_df}\NormalTok{(files_AMPS, processAMPS)} -\end{Highlighting} -\end{Shaded} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{Amps_stats <-}\StringTok{ }\NormalTok{Amps }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{group_by}\NormalTok{(region) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{mean =}\NormalTok{ (}\KeywordTok{mean}\NormalTok{(Value, }\DataTypeTok{na.rm =}\NormalTok{ T)),} - \DataTypeTok{n =} \KeywordTok{n}\NormalTok{(),} - \DataTypeTok{sd =} \KeywordTok{sd}\NormalTok{(Value, }\DataTypeTok{na.rm =}\NormalTok{ T),} - \DataTypeTok{var =} \KeywordTok{var}\NormalTok{(Value, }\DataTypeTok{na.rm =}\NormalTok{ T))} - -\NormalTok{Amps_stats} - -\NormalTok{readr}\OperatorTok{::}\KeywordTok{write_csv}\NormalTok{(Amps_stats, }\DataTypeTok{path =} \StringTok{"../Amps_stats.csv"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\begin{Shaded} -\begin{Highlighting}[] -\KeywordTok{ggplot}\NormalTok{(Amps) }\OperatorTok{+} -\StringTok{ }\KeywordTok{geom_point}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{x =}\NormalTok{ Time, }\DataTypeTok{y =}\NormalTok{ Value, }\DataTypeTok{colour =}\NormalTok{ region)) }\OperatorTok{+} -\StringTok{ }\KeywordTok{facet_grid}\NormalTok{(region}\OperatorTok{~}\NormalTok{., }\DataTypeTok{scales =} \StringTok{"free_y"}\NormalTok{) }\OperatorTok{+} -\StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{title =} \StringTok{"Cleaned data for Amps"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\subsection{Processing data}\label{processing-data} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{readr}\OperatorTok{::}\KeywordTok{write_csv}\NormalTok{(Amps, }\DataTypeTok{path =} \StringTok{"amps_all_substations.csv"}\NormalTok{)} -\end{Highlighting} -\end{Shaded} - -\begin{Shaded} -\begin{Highlighting}[] -\KeywordTok{library}\NormalTok{(odbc)} - -\KeywordTok{library}\NormalTok{(DBI)} -\CommentTok{# Create an ephemeral in-memory RSQLite database} -\NormalTok{con <-}\StringTok{ }\KeywordTok{dbConnect}\NormalTok{(RSQLite}\OperatorTok{::}\KeywordTok{SQLite}\NormalTok{(), }\StringTok{"amps.sqlite"}\NormalTok{)} - -\KeywordTok{dbListTables}\NormalTok{(con)} - - -\KeywordTok{dbWriteTable}\NormalTok{(con, }\StringTok{"amps"}\NormalTok{, Amps)} -\KeywordTok{dbListTables}\NormalTok{(con)} -\end{Highlighting} -\end{Shaded} - -\subsection{Querying the data}\label{querying-the-data} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{con <-}\StringTok{ }\KeywordTok{dbConnect}\NormalTok{(RSQLite}\OperatorTok{::}\KeywordTok{SQLite}\NormalTok{(), }\StringTok{"amps.sqlite"}\NormalTok{)} - - -\KeywordTok{library}\NormalTok{(dbplyr)} - -\NormalTok{Amps_db <-}\StringTok{ }\KeywordTok{tbl}\NormalTok{(con, }\StringTok{"amps"}\NormalTok{)} - - -\NormalTok{flights_db }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{group_by}\NormalTok{(region) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{mean =}\NormalTok{ (}\KeywordTok{mean}\NormalTok{(Value, }\DataTypeTok{na.rm =}\NormalTok{ T)),} - \DataTypeTok{n =} \KeywordTok{n}\NormalTok{(),} - \DataTypeTok{sd =} \KeywordTok{sd}\NormalTok{(Value, }\DataTypeTok{na.rm =}\NormalTok{ T),} - \DataTypeTok{var =} \KeywordTok{var}\NormalTok{(Value, }\DataTypeTok{na.rm =}\NormalTok{ T))} -\end{Highlighting} -\end{Shaded} - -\subsection{Round to Nearest N -minutes}\label{round-to-nearest-n-minutes} - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{processAMPS_5mins <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(filePath)\{} - - \KeywordTok{message}\NormalTok{(}\StringTok{"Processing "}\NormalTok{, filePath)} - - \CommentTok{# 1st Level} -\NormalTok{ dirName_}\DecValTok{1}\NormalTok{ <-}\StringTok{ }\NormalTok{filePath }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\KeywordTok{dirname}\NormalTok{() }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\NormalTok{basename} - - \CommentTok{# 2nd Level} -\NormalTok{ dirName_}\DecValTok{2}\NormalTok{ <-}\StringTok{ }\NormalTok{filePath }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\KeywordTok{dirname}\NormalTok{() }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\KeywordTok{dirname}\NormalTok{() }\OperatorTok{%>%}\StringTok{ } -\StringTok{ }\NormalTok{basename} - - \ControlFlowTok{if}\NormalTok{ (dirName_}\DecValTok{2} \OperatorTok{==}\StringTok{ "Primary"}\NormalTok{)\{} -\NormalTok{ dirName_}\DecValTok{2}\NormalTok{ <-}\StringTok{ }\NormalTok{dirName_}\DecValTok{1} -\NormalTok{ dirName_}\DecValTok{1}\NormalTok{ <-}\StringTok{ ""} -\NormalTok{ \}} - - \CommentTok{# Load the CSV. There were some tab seperated files which are saved as CSVs, which confuse the search. There if the data is loaded incorrectly (only having a single column), the code will try and load it as a TSV.} -\NormalTok{ dataLoaded <-}\StringTok{ }\KeywordTok{suppressWarnings}\NormalTok{(}\KeywordTok{read_csv}\NormalTok{(filePath, }\DataTypeTok{skip =} \DecValTok{3}\NormalTok{, }\DataTypeTok{col_types =} \KeywordTok{cols}\NormalTok{()))} - \ControlFlowTok{if}\NormalTok{(}\KeywordTok{ncol}\NormalTok{(dataLoaded) }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{)\{} -\NormalTok{ dataLoaded <-}\StringTok{ }\KeywordTok{suppressWarnings}\NormalTok{(}\KeywordTok{read_tsv}\NormalTok{(filePath, }\DataTypeTok{skip =} \DecValTok{3}\NormalTok{, }\DataTypeTok{col_types =} \KeywordTok{cols}\NormalTok{()))} -\NormalTok{ \}} - - \CommentTok{# Reformat data} -\NormalTok{ dataLoaded <-} -\StringTok{ }\NormalTok{dataLoaded }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{mutate_at}\NormalTok{(}\KeywordTok{vars}\NormalTok{(Time), }\ControlFlowTok{function}\NormalTok{(x)\{}\KeywordTok{gsub}\NormalTok{(}\StringTok{'[^ -~]'}\NormalTok{, }\StringTok{''}\NormalTok{, x)\}) }\OperatorTok{%>%}\StringTok{ }\CommentTok{# Remove invalid UTF characters} -\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{Time =}\NormalTok{ lubridate}\OperatorTok{::}\KeywordTok{dmy_hms}\NormalTok{(Time),} - \DataTypeTok{region =}\NormalTok{ dirName_}\DecValTok{2}\NormalTok{,} - \DataTypeTok{sub_region =}\NormalTok{ dirName_}\DecValTok{1}\NormalTok{,} - \DataTypeTok{code =} \KeywordTok{paste}\NormalTok{(region, sub_region, }\DataTypeTok{sep =} \StringTok{"_"}\NormalTok{),} -\NormalTok{ ) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{Time =}\NormalTok{ lubridate}\OperatorTok{::}\KeywordTok{floor_date}\NormalTok{(Time, }\DataTypeTok{unit =} \StringTok{"5 minutes"}\NormalTok{)) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{group_by}\NormalTok{(Time, region, code) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{Value =} \KeywordTok{mean}\NormalTok{(Value)) }\OperatorTok{%>%} -\StringTok{ }\KeywordTok{arrange}\NormalTok{(Time)} - - \CommentTok{# There are some datasets which contain no values, whch can cause errors in running} - \CommentTok{# If this happens, return NULL} - \ControlFlowTok{if}\NormalTok{(}\KeywordTok{is.character}\NormalTok{(dataLoaded}\OperatorTok{$}\NormalTok{Value)) }\KeywordTok{return}\NormalTok{(}\OtherTok{NULL}\NormalTok{)} - - \CommentTok{# Returns the loaded and cleaned dataframe} - \KeywordTok{return}\NormalTok{(dataLoaded)} -\NormalTok{\}} -\end{Highlighting} -\end{Shaded} - -Nearest 5 minutes: - -\begin{Shaded} -\begin{Highlighting}[] -\NormalTok{Amps_5mins <<-}\StringTok{ }\NormalTok{purrr}\OperatorTok{::}\KeywordTok{map_df}\NormalTok{(files_AMPS[}\DecValTok{1}\OperatorTok{:}\DecValTok{4}\NormalTok{], processAMPS_5mins)} -\end{Highlighting} -\end{Shaded} - -\section*{References}\label{references} -\addcontentsline{toc}{section}{References} - -\hypertarget{refs}{} -\hypertarget{ref-rmarkdown}{} -Allaire, JJ, Yihui Xie, Jonathan McPherson, Javier Luraschi, Kevin -Ushey, Aron Atkins, Hadley Wickham, Joe Cheng, Winston Chang, and -Richard Iannone. 2020. \emph{Rmarkdown: Dynamic Documents for R}. -\url{https://github.com/rstudio/rmarkdown}. - -\hypertarget{ref-skimr}{} -Arino de la Rubia, Eduardo, Hao Zhu, Shannon Ellis, Elin Waring, and -Michael Quinn. 2017. \emph{Skimr: Skimr}. -\url{https://github.com/ropenscilabs/skimr}. - -\hypertarget{ref-data.table}{} -Dowle, M, A Srinivasan, T Short, S Lianoglou with contributions from R -Saporta, and E Antonyan. 2015. \emph{Data.table: Extension of -Data.frame}. \url{https://CRAN.R-project.org/package=data.table}. - -\hypertarget{ref-lubridate}{} -Grolemund, Garrett, and Hadley Wickham. 2011. ``Dates and Times Made -Easy with lubridate.'' \emph{Journal of Statistical Software} 40 (3): -1--25. \url{http://www.jstatsoft.org/v40/i03/}. - -\hypertarget{ref-baseR}{} -R Core Team. 2016. \emph{R: A Language and Environment for Statistical -Computing}. Vienna, Austria: R Foundation for Statistical Computing. -\url{https://www.R-project.org/}. - -\hypertarget{ref-ggplot2}{} -Wickham, Hadley. 2009. \emph{Ggplot2: Elegant Graphics for Data -Analysis}. Springer-Verlag New York. \url{http://ggplot2.org}. - -\hypertarget{ref-knitr}{} -Xie, Yihui. 2016. \emph{Knitr: A General-Purpose Package for Dynamic -Report Generation in R}. \url{https://CRAN.R-project.org/package=knitr}. - -\hypertarget{ref-bookdown}{} ----------. 2018. \emph{Bookdown: Authoring Books and Technical Documents -with R Markdown}. \url{https://github.com/rstudio/bookdown}. - -\hypertarget{ref-kableExtra}{} -Zhu, Hao. 2019. \emph{KableExtra: Construct Complex Table with 'Kable' -and Pipe Syntax}. \url{https://CRAN.R-project.org/package=kableExtra}. - -\end{document}