Skip to content
Snippets Groups Projects
Commit bb01776a authored by Ben Anderson's avatar Ben Anderson
Browse files

Merge branch 'master' into 'master'

switched to BEIS experimental postcode level gas & electricity use data to...

See merge request !6
parents e76a1123 9eaf0036
No related branches found
No related tags found
1 merge request!6switched to BEIS experimental postcode level gas & electricity use data to...
Source diff could not be displayed: it is too large. Options to address this: view the blob.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
...@@ -42,7 +42,6 @@ Numbers that could have been done on the back of one and should probably come wi ...@@ -42,7 +42,6 @@ Numbers that could have been done on the back of one and should probably come wi
```{r setup, include=FALSE} ```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(echo = TRUE)
library(data.table)
library(ggplot2) library(ggplot2)
``` ```
...@@ -58,52 +57,28 @@ For now we're using: ...@@ -58,52 +57,28 @@ For now we're using:
* LSOA level data on [gas](https://www.gov.uk/government/collections/sub-national-gas-consumption-data) and [electricity](https://www.gov.uk/government/collections/sub-national-electricity-consumption-data) 'consumption' at LSOA/SOA level for 2015 aggregated to postcode sectors * LSOA level data on [gas](https://www.gov.uk/government/collections/sub-national-gas-consumption-data) and [electricity](https://www.gov.uk/government/collections/sub-national-electricity-consumption-data) 'consumption' at LSOA/SOA level for 2015 aggregated to postcode sectors
* [Indices of Deprivation 2019](https://www.gov.uk/government/statistics/english-indices-of-deprivation-2019) for England * [Indices of Deprivation 2019](https://www.gov.uk/government/statistics/english-indices-of-deprivation-2019) for England
```{r loadData} ```{r loadCats}
gas_dt <- data.table::fread(paste0(dp, "/beis/subnationalGas/lsoaDom/LSOA_GAS_2015.csv.gz"))
gas_dt[, lsoa11cd := `Lower Layer Super Output Area (LSOA) Code`]
gas_dt[, mean_gas_kWh := `Mean consumption (kWh per meter)`]
gas_dt[, total_gas_kWh := `Consumption (kWh)`]
gas_dt[, nGasMeters := `Number of consuming meters`]
elec_dt <- data.table::fread(paste0(dp, "/beis/subnationalElec/lsoaDom/LSOA_ELEC_2015.csv.gz"))
elec_dt[, lsoa11cd := `Lower Layer Super Output Area (LSOA) Code`]
elec_dt[, mean_elec_kWh := `Mean domestic electricity consumption
(kWh per meter)`]
elec_dt[, total_elec_kWh := `Total domestic electricity consumption (kWh)`]
elec_dt[, nElecMeters := `Total number of domestic electricity meters`]
setkey(gas_dt, lsoa11cd)
setkey(elec_dt, lsoa11cd)
setkey(lsoa_DT, lsoa11cd)
merged_lsoa_DT <- gas_dt[, .(lsoa11cd, mean_gas_kWh, total_gas_kWh, nGasMeters)][elec_dt[, .(lsoa11cd,mean_elec_kWh,total_elec_kWh,nElecMeters)]][lsoa_DT]
# remove the record for postcodes which did not have a postcode sector
message("How many LSOAs do not map to a postcode sector?")
nrow(merged_lsoa_DT[is.na(pcd_sector)])
head(merged_lsoa_DT[is.na(pcd_sector)])
# !is.na(pcd_sector)
postcode_sector_energy <- merged_lsoa_DT[, .(nLSOAs = .N,
nPostcodes = sum(nPostcodes),
mean_gas_kWh = mean(mean_gas_kWh, na.rm = TRUE),
total_gas_kWh = sum(total_gas_kWh, na.rm = TRUE),
mean_elec_kWh = mean(mean_elec_kWh, na.rm = TRUE),
total_elec_kWh = sum(total_elec_kWh, na.rm = TRUE),
nGasMeters = sum(nGasMeters, na.rm = TRUE),
nElecMeters = sum(nElecMeters, na.rm = TRUE)), keyby = .(pcd_sector, ladnm, ladnmw)]
#head(postcode_sector_energy)
# cats # cats
cats_DT <- data.table::fread(paste0(dp, "UK_Animal and Plant Health Agency/APHA0372-Cat_Density_Postcode_District.csv")) cats_DT <- data.table::fread(paste0(dp, "UK_Animal and Plant Health Agency/APHA0372-Cat_Density_Postcode_District.csv"))
cats_DT[, pcd_sector := PostcodeDistrict] cats_DT[, pcd_sector := PostcodeDistrict]
setkey(cats_DT, pcd_sector) setkey(cats_DT, pcd_sector)
setkey(postcode_sector_energy, pcd_sector)
pc_district <- cats_DT[postcode_sector_energy] nrow(cats_DT)
setkey(pc_sector_energy_dt, pcd_sector)
nrow(pc_sector_energy_dt)
pc_district <- merge(cats_DT, pc_sector_energy_dt , by = "pcd_sector") # keeps only postcode sectors where we have cat data
nrow(pc_district)
# there are postcode sectors with no electricity meters - for now we'll remove them
# pending further investigation
summary(pc_district)
table(pc_district$GOR10NM, pc_district$rgn)
``` ```
We could also use `@SERL_UK`'s [smart meter gas/elec data](https://twitter.com/dataknut/status/1466712963222540289?s=20), dwelling characteristics and pet ownership (but no species detail :-) We could also use `@SERL_UK`'s [smart meter gas/elec data](https://twitter.com/dataknut/status/1466712963222540289?s=20), dwelling characteristics and pet ownership (but no species detail :-)
...@@ -118,53 +93,64 @@ Well, in some places there seem to be a lot of estimated cats... ...@@ -118,53 +93,64 @@ Well, in some places there seem to be a lot of estimated cats...
pc_district[, mean_Cats := EstimatedCatPopulation/nElecMeters] pc_district[, mean_Cats := EstimatedCatPopulation/nElecMeters]
head(pc_district[, .(PostcodeDistrict, EstimatedCatPopulation, mean_Cats, nPostcodes, nElecMeters)][order(-mean_Cats)]) head(pc_district[, .(PostcodeDistrict, EstimatedCatPopulation, mean_Cats, nPostcodes, nElecMeters)][order(-mean_Cats)])
``` ```
LL23 is on the south east corner of the [Snowdonia National Park...](https://www.google.co.uk/maps/place/Bala+LL23/@52.8953768,-3.775299,11z/data=!3m1!4b1!4m5!3m4!1s0x4865404ae1208f67:0x65a437b997c0dfb2!8m2!3d52.8825403!4d-3.6497989) while EH25 is on the outskirts of [Edinburgh](https://www.google.co.uk/maps/place/EH25/@55.8518992,-3.2076308,13z/data=!4m5!3m4!1s0x4887bf6548dd78d7:0xd6f980c5a3b93592!8m2!3d55.8560564!4d-3.1733124). SA63 is in south west [Wales](https://www.google.co.uk/maps/place/Clarbeston+Road+SA63/@51.8852685,-4.9147384,12z/data=!3m1!4b1!4m5!3m4!1s0x4868d5805b12efe5:0xca42ee4bc84a2f77!8m2!3d51.8900045!4d-4.8502065) while LL23 is on the edge of the [Snowdonia National Park](https://www.google.co.uk/maps/place/Bala+LL23/@52.8953768,-3.7752989,11z/data=!3m1!4b1!4m5!3m4!1s0x4865404ae1208f67:0x65a437b997c0dfb2!8m2!3d52.8825403!4d-3.6497989)....
## More dwellings, more cats? ## More dwellings, more cats?
Is there a correlation between estimated total cats and the number of dwellings (electricity meters)? Is there a correlation between estimated total cats and the number of dwellings (electricity meters)?
```{r testTotalElecMeters} ```{r testTotalElecMeters}
ggplot2::ggplot(pc_district, aes(x = nElecMeters , y = EstimatedCatPopulation)) + ggplot2::ggplot(pc_district, aes(x = nElecMeters , y = EstimatedCatPopulation,
colour = GOR10NM)) +
geom_point() + geom_point() +
geom_smooth() geom_smooth()
``` ```
# More cats, more gas?
Is there a correlation between estimated cat ownership and total gas use? Is there a correlation between estimated cat ownership and total gas use?
```{r testTotalGas} ```{r testTotalGas}
ggplot2::ggplot(pc_district, aes(x = EstimatedCatPopulation, y = total_gas_kWh)) + ggplot2::ggplot(pc_district, aes(x = EstimatedCatPopulation, y = total_gas_kWh,
colour = GOR10NM)) +
geom_point() geom_point()
``` ```
Or mean gas use and mean cats? Or mean gas use and mean cats?
```{r testMeanGas} ```{r testMeanGas}
ggplot2::ggplot(pc_district, aes(x = mean_Cats, y = mean_gas_kWh)) + pc_district[, mean_gas_kWh := total_gas_kWh/nGasMeters]
ggplot2::ggplot(pc_district, aes(x = mean_Cats, y = mean_gas_kWh, colour = GOR10NM)) +
geom_point() geom_point()
``` ```
# More cats, more electricity?
Or total electricity use and cats? Or total electricity use and cats?
```{r testTotalElec} ```{r testTotalElec}
ggplot2::ggplot(pc_district, aes(x = EstimatedCatPopulation, y = total_elec_kWh)) + ggplot2::ggplot(pc_district, aes(x = EstimatedCatPopulation, y = total_elec_kWh, colour = GOR10NM)) +
geom_point() geom_point()
``` ```
Or mean elec use and mean cats? Or mean elec use and mean cats?
```{r testMeanElec} ```{r testMeanElec}
ggplot2::ggplot(pc_district, aes(x = mean_Cats, y = mean_elec_kWh)) + pc_district[, mean_elec_kWh := total_elec_kWh/nGasMeters]
ggplot2::ggplot(pc_district, aes(x = mean_Cats, y = mean_elec_kWh, colour = GOR10NM)) +
geom_point() geom_point()
``` ```
# More cats, more energy?
Or total energy use and total cats? Or total energy use and total cats?
```{r testTotalEnergy} ```{r testTotalEnergy}
pc_district[, total_energy_kWh := total_gas_kWh + total_elec_kWh] pc_district[, total_energy_kWh := total_gas_kWh + total_elec_kWh]
ggplot2::ggplot(pc_district, aes(x = EstimatedCatPopulation, y = total_energy_kWh)) + ggplot2::ggplot(pc_district, aes(x = EstimatedCatPopulation, y = total_energy_kWh, colour = GOR10NM)) +
geom_point() geom_point() +
geom_smooth()
``` ```
Well, there may be something in there? Let's try a boxplot by cat deciles... Figure \@ref(fig:catDeciles) Well, there may be something in there? Let's try a boxplot by cat deciles... Figure \@ref(fig:catDeciles)
...@@ -174,8 +160,10 @@ pc_district[, cat_decile := dplyr::ntile(EstimatedCatPopulation, 10)] ...@@ -174,8 +160,10 @@ pc_district[, cat_decile := dplyr::ntile(EstimatedCatPopulation, 10)]
#head(pc_district[is.na(cat_decile)]) #head(pc_district[is.na(cat_decile)])
ggplot2::ggplot(pc_district[!is.na(cat_decile)], aes(x = as.factor(cat_decile), y = total_energy_kWh/1000000)) + ggplot2::ggplot(pc_district[!is.na(cat_decile)], aes(x = as.factor(cat_decile), y = total_energy_kWh/1000000)) +
geom_boxplot() + geom_boxplot() +
facet_wrap(. ~ GOR10NM) +
labs(x = "Cat ownership deciles", labs(x = "Cat ownership deciles",
y = "Total domestic electricity & gas GWh") y = "Total domestic electricity & gas GWh",
caption = "Postcode sectors (Data: BEIS & Animal and Plant Health Agency, 2015)")
``` ```
Well... Well...
......
# loads the data and runs the Rmd render
# Packages ----
library(data.table)
library(here)
# Functions ----
makeReport <- function(f){ makeReport <- function(f){
# default = html # default = html
rmarkdown::render(input = paste0(here::here("itsTheCatsStupid", f), ".Rmd"), rmarkdown::render(input = paste0(here::here("itsTheCatsStupid", f), ".Rmd"),
...@@ -8,17 +15,42 @@ makeReport <- function(f){ ...@@ -8,17 +15,42 @@ makeReport <- function(f){
) )
} }
# >> run report ---- # Set data path ----
dp <- "~/Dropbox/data/"
# Run report ----
#> define yaml ----
rmdFile <- "itsTheCatsStupid" # not the full path rmdFile <- "itsTheCatsStupid" # not the full path
title = "#backOfaFagPacket: Its the Cats, stupid" title = "#backOfaFagPacket: Its the Cats, stupid"
subtitle = "Does cat ownership correlate with home energy demand?" subtitle = "Does cat ownership correlate with home energy demand?"
authors = "Ben Anderson" authors = "Ben Anderson"
# load the postcode data here (slow) #> load the postcode data here (slow)
dp <- "~/Dropbox/data/"
postcodes_dt <- data.table::fread(paste0(dp, "UK_postcodes/PCD_OA_LSOA_MSOA_LAD_AUG20_UK_LU.csv.gz")) postcodes_elec_dt <- data.table::fread(paste0(dp, "beis/subnationalElec/Postcode_level_all_meters_electricity_2015.csv"))
postcodes_dt[, pcd_sector := tstrsplit(pcds, " ", keep = c(1))] postcodes_elec_dt[, pcd_sector := data.table::tstrsplit(POSTCODE, " ", keep = c(1))]
lsoa_DT <- postcodes_dt[, .(nPostcodes = .N), keyby = .(lsoa11cd, pcd_sector, ladnm, ladnmw)] pc_sector_elec_dt <- postcodes_elec_dt[, .(nPostcodes = .N,
total_elec_kWh = sum(`Consumption (kWh)`),
nElecMeters = sum(`Number of meters`)
), keyby = .(pcd_sector)]
nrow(pc_sector_elec_dt)
postcodes_gas_dt <- data.table::fread(paste0(dp, "beis/subnationalGas/Experimental_Gas_Postcode_Statistics_2015.csv"))
postcodes_gas_dt[, pcd_sector := data.table::tstrsplit(POSTCODE, " ", keep = c(1))]
pc_sector_gas_dt <- postcodes_gas_dt[, .(total_gas_kWh = sum(`Consumption (kWh)`),
nGasMeters = sum(`Number of meters`)), keyby = .(pcd_sector)]
nrow(pc_sector_gas_dt)
setkey(pc_sector_elec_dt, pcd_sector)
setkey(pc_sector_gas_dt, pcd_sector)
pc_sector_energy_dt <- pc_sector_gas_dt[pc_sector_elec_dt]
pc_sector_region_dt <- data.table::fread(here::here("data", "postcode_sectors_dt.csv"))
setkey(pc_sector_region_dt, pcd_sector)
pc_sector_energy_dt <- pc_sector_region_dt[pc_sector_energy_dt]
# re-run report here #> re-run report here ----
makeReport(rmdFile) makeReport(rmdFile)
\ No newline at end of file
postcodes <- data.table::fread("~/Dropbox/data/UK_postcodes/NSPL_AUG_2020_UK/Data/NSPL_AUG_2020_UK.csv.gz")
postcodes[, pcd_sector := data.table::tstrsplit(pcds, " ", keep = c(1))]
pc_sectors_dt <- postcodes[, .(nPostcodes = .N), keyby = .(pcd_sector, rgn)]
pc_sectors_dt[, GOR10CD := rgn]
region_codes <- readxl::read_xlsx("~/Dropbox/data/UK_postcodes/NSPL_AUG_2020_UK/Documents/Region names and codes EN as at 12_10 (GOR).xlsx")
region_code_dt <- data.table::as.data.table(region_codes)
setkey(region_code_dt, GOR10CD)
setkey(pc_sectors_dt, GOR10CD)
dt <- region_code_dt[pc_sectors_dt]
data.table::fwrite(dt, file = here::here("data", "postcode_sectors_dt.csv"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment