From ba98a08bfe3d2bb64ad6a8ca7dff037ab4743445 Mon Sep 17 00:00:00 2001
From: Ben Anderson <dataknut@icloud.com>
Date: Fri, 3 Dec 2021 14:16:29 +0000
Subject: [PATCH] added the cats thing

---
 itsTheCatsStupid/itsTheCatsStupid.Rmd | 159 ++++++++++++++++++++++++++
 itsTheCatsStupid/makeFile.R           |  17 +++
 2 files changed, 176 insertions(+)
 create mode 100644 itsTheCatsStupid/itsTheCatsStupid.Rmd
 create mode 100644 itsTheCatsStupid/makeFile.R

diff --git a/itsTheCatsStupid/itsTheCatsStupid.Rmd b/itsTheCatsStupid/itsTheCatsStupid.Rmd
new file mode 100644
index 0000000..e13c500
--- /dev/null
+++ b/itsTheCatsStupid/itsTheCatsStupid.Rmd
@@ -0,0 +1,159 @@
+---
+params:
+  subtitle: ""
+  title: ""
+  authors: ""
+title: '`r params$title`'
+subtitle: '`r params$subtitle`'
+author: '`r params$authors`'
+date: 'Last run at: `r Sys.time()`'
+output:
+  bookdown::html_document2:
+    self_contained: true
+    fig_caption: yes
+    code_folding: hide
+    number_sections: yes
+    toc: yes
+    toc_depth: 2
+    toc_float: TRUE
+  bookdown::pdf_document2:
+    fig_caption: yes
+    number_sections: yes
+  bookdown::word_document2:
+    fig_caption: yes
+    number_sections: yes
+    toc: yes
+    toc_depth: 2
+    fig_width: 5
+bibliography: '`r path.expand("~/github/dataknut/refs/refs.bib")`'
+---
+
+<hr>
+
+>This fridayFagPacket was first published as a [blog](https://dataknut.wordpress.com/2020/10/16/retrofit-or-bust/)
+
+<hr>
+
+# fridayFagPackets
+
+Numbers that could have been done on the back of one and should probably come with a similar health warning...
+
+>Find out [more](https://dataknut.github.io/fridayFagPackets/).
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+library(data.table)
+library(ggplot2)
+```
+
+# It's the cats, stupid
+Inspired by @giulio_mattioli's [recent paper on the car dependence of dog ownership](https://twitter.com/giulio_mattioli/status/1466361022747455492) we thought we'd take a look at [cats](https://twitter.com/giulio_mattioli/status/1466710752606179331) and residential energy demand. Why? Well people like to keep their cats warm but, more importantly, they also cut big holes in doors and/or windows to let the cats in and out. Hardly a thermally sealed envelope!
+
+# What's the data?
+
+For now we're using:
+
+ * postcode sector level estimates of cat ownership in the UK. Does such a thing exist? [YEAH](https://t.co/ZEwaB5YEHI)!
+ * LSOA level data on gas and electricity 'consumption' at LSOA/SOA level aggregated to postcode sectors
+
+```{r loadData}
+postcodes_dt <- data.table::fread("~/Dropbox/data/UK_postcodes/PCD_OA_LSOA_MSOA_LAD_AUG20_UK_LU.csv.gz")
+postcodes_dt[, pcd_sector := tstrsplit(pcds, " ", keep = c(1))]
+lsoa_DT <- postcodes_dt[, .(nPostcodes = .N), keyby = .(pcd_sector, lsoa11cd, ladnm, ladnmw)]
+
+gas_dt <- data.table::fread("~/Dropbox/data/beis/subnationalGas/lsoaDom/LSOA_GAS_2019.csv.gz")
+gas_dt[, lsoa11cd := `Lower Layer Super Output Area (LSOA) Code`]
+gas_dt[, mean_gas_kWh := `Mean consumption (kWh per meter)`]
+gas_dt[, total_gas_kWh := `Consumption (kWh)`]
+gas_dt[, nGasMeters := `Number of consuming meters`]
+
+elec_dt <- data.table::fread("~/Dropbox/data/beis/subnationalElec/lsoaDom/LSOA_ELEC_2019.csv.gz")
+elec_dt[, lsoa11cd := `Lower Layer Super Output Area (LSOA) Code`]
+elec_dt[, mean_elec_kWh := `Mean domestic electricity consumption 
+(kWh per meter)`]
+elec_dt[, total_elec_kWh := `Total domestic electricity consumption (kWh)`]
+elec_dt[, nElecMeters := `Total number of domestic electricity meters`]
+
+setkey(gas_dt, lsoa11cd)
+setkey(elec_dt, lsoa11cd)
+setkey(lsoa_DT, lsoa11cd)
+
+merged_lsoa_DT <- gas_dt[, .(lsoa11cd, mean_gas_kWh, total_gas_kWh, nGasMeters)][elec_dt[, .(lsoa11cd,mean_elec_kWh,total_elec_kWh,nElecMeters)]][lsoa_DT]
+
+# remove the record for postcodes which did not have a postcode sector
+message("How many LSOAs do not map to a postcode sector?")
+nrow(merged_lsoa_DT[is.na(pcd_sector)])
+head(merged_lsoa_DT[is.na(pcd_sector)])
+
+# !is.na(pcd_sector)
+postcode_sector_energy <- merged_lsoa_DT[, .(nLSOAs = .N,
+                                                               nPostcodes = sum(nPostcodes),
+                                             mean_gas_kWh = mean(mean_gas_kWh, na.rm = TRUE),
+                                             total_gas_kWh = sum(total_gas_kWh, na.rm = TRUE),
+                                             mean_elec_kWh = mean(mean_elec_kWh, na.rm = TRUE),
+                                             total_elec_kWh = sum(total_elec_kWh, na.rm = TRUE),
+                                             nGasMeters = sum(nGasMeters, na.rm = TRUE),
+                                             nElecMeters = sum(nElecMeters, na.rm = TRUE)), keyby = .(pcd_sector, ladnm, ladnmw)]
+head(postcode_sector_energy)
+
+# cats
+cats_DT <- data.table::fread("~/Dropbox/data/UK_Animal and Plant Health Agency/APHA0372-Cat_Density_Postcode_District.csv")
+cats_DT[, pcd_sector := PostcodeDistrict]
+
+setkey(cats_DT, pcd_sector)
+setkey(postcode_sector_energy, pcd_sector)
+
+pc_district <- cats_DT[postcode_sector_energy]
+
+```
+
+We could also use @SERL_UK's [smart meter gas/elec data](https://twitter.com/dataknut/status/1466712963222540289?s=20), dwelling characteristics and pet ownership (but no species detail :-) 
+
+# What do we find?
+
+Well, in some places there seem to be a lot of estimated cats...
+
+```{r maxCats}
+pc_district[, mean_Cats := EstimatedCatPopulation/nElecMeters]
+head(pc_district[, .(PostcodeDistrict, EstimatedCatPopulation, mean_Cats, nPostcodes, nElecMeters)][order(-mean_Cats)])
+```
+LL23 is on the south east corner of the [Snowdonia National Park...](https://www.google.co.uk/maps/place/Bala+LL23/@52.8953768,-3.775299,11z/data=!3m1!4b1!4m5!3m4!1s0x4865404ae1208f67:0x65a437b997c0dfb2!8m2!3d52.8825403!4d-3.6497989) while EH25 is on the outskirts of [Edinburgh](https://www.google.co.uk/maps/place/EH25/@55.8518992,-3.2076308,13z/data=!4m5!3m4!1s0x4887bf6548dd78d7:0xd6f980c5a3b93592!8m2!3d55.8560564!4d-3.1733124).
+
+Is there a correlation between estimated total cats and the number of dwellings (electricity meters)?
+
+```{r testTotalGas}
+ggplot2::ggplot(pc_district, aes(x = nElecMeters , y = EstimatedCatPopulation)) +
+  geom_point() +
+  geom_smooth()
+```
+
+Is there a correlation between estimated cat ownership and energy use?
+
+```{r testTotalGas}
+ggplot2::ggplot(pc_district, aes(x = EstimatedCatPopulation, y = total_gas_kWh)) +
+  geom_point()
+```
+
+```{r testMeanGas}
+
+ggplot2::ggplot(pc_district, aes(x = mean_Cats, y = mean_gas_kWh)) +
+  geom_point()
+```
+```{r testTotalElec}
+ggplot2::ggplot(pc_district, aes(x = EstimatedCatPopulation, y = total_elec_kWh)) +
+  geom_point()
+```
+```{r testMeanElec}
+ggplot2::ggplot(pc_district, aes(x = mean_Cats, y = mean_elec_kWh)) +
+  geom_point()
+```
+
+# R packages used
+
+ * bookdown [@bookdown]
+ * data.table [@data.table]
+ * ggplot2 [@ggplot2]
+ * knitr [@knitr]
+ * rmarkdown [@rmarkdown]
+ 
+# References
diff --git a/itsTheCatsStupid/makeFile.R b/itsTheCatsStupid/makeFile.R
new file mode 100644
index 0000000..7d981a0
--- /dev/null
+++ b/itsTheCatsStupid/makeFile.R
@@ -0,0 +1,17 @@
+makeReport <- function(f){
+  # default = html
+  rmarkdown::render(input = paste0(here::here("retrofitOrBust", f), ".Rmd"),
+                    params = list(title = title,
+                                  subtitle = subtitle,
+                                  authors = authors),
+                    output_file = paste0(here::here("docs/"), f, ".html")
+  )
+}
+
+# >> run report ----
+rmdFile <- "itsTheCatsStupid" # not the full path
+title = "#backOfaFagPacket: It's the Cats, stupid"
+subtitle = "Does cat ownership correlate with home energy demand?"
+authors = "Ben Anderson"
+
+makeReport(rmdFile)
\ No newline at end of file
-- 
GitLab