diff --git a/truncRepTest/truncRepTest.Rmd b/truncRepTest/truncRepTest.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..f76d3d8b6763d32df1592016a991eda1e1f095c6 --- /dev/null +++ b/truncRepTest/truncRepTest.Rmd @@ -0,0 +1,211 @@ +--- +title: 'Testing truncate, replicate, sample (TRS) integerisation' +subtitle: 'Testing methods for integer weighting using aggregated household electricity consumption in English MSOAs' +author: "Ben Anderson (b.anderson@soton.ac.uk), [SERG, Energy & Climate Change](http://www.energy.soton.ac.uk/), University of Southampton" +date: 'Last run at: `r Sys.time()`' +output: + bookdown::html_document2: + code_folding: hide + fig_caption: yes + number_sections: yes + self_contained: no + toc: yes + toc_depth: 3 + toc_float: yes + bookdown::pdf_document2: + fig_caption: yes + keep_tex: yes + number_sections: yes + toc: yes + toc_depth: 2 + bookdown::word_document2: + fig_caption: yes + toc: yes + toc_depth: 2 +always_allow_html: yes +bibliography: '`r path.expand("~/bibliography.bib")`' +--- + +```{r setup, include=FALSE} +# Knitr setup ---- +knitr::opts_chunk$set(echo = TRUE) +knitr::opts_chunk$set(warning = FALSE) # for final tidy run +knitr::opts_chunk$set(message = FALSE) # for final tidy run + +# Set start time ---- +startTime <- proc.time() + +# Libraries ---- + +library(data.table) # cos we like data.table (you may not in which case dplyr is fine :-) +library(lubridate) # for data & time manip +library(hms) # for hh:mm:ss if we need it +library(ggplot2) # fancy plots +library(kableExtra) # for extra kable +library(skimr) # for skim (data description) +library(XML) + +# Parameters ---- +dPath <- "~/Data" # edit for your set up + +gisPath <- paste0(dPath, "/") + +censusLutsPath <- paste0(dPath, "/UK_Census/Census_LUTS/2011/EngWales/OA11_LSOA11_MSOA11_LAD11_EW_LU.zip Folder") + +oaCensusPath <- paste0(dPath, "/UK_Census/2011Data/EW/oa/processed") +lsoaCensusPath <- paste0(dPath, "/UK_Census/2011Data/EW/lsoa/processed") +oacPath <- paste0(dPath, "/UK OAC/2011/CDRC/Tables") +beisPath <- paste0(dPath, "/DECC/SubnationalEnergyData/2010-2018") + +# residential consumption as % of total +consRatioThreshold <- 0.90 +``` + + +# Intro + +Fractional weights are sufficient for the calculation of area level weighted statistics. However a number of applications such as local electricity network demand modelling or agent-based transport/mobility models require household units - implying the need for integerisation of weights to enable the selection of specific households to fit a given area. Lovelace & Ballas introduced a new approach to taking fracitonal weights generated using IPF and integerising them to give exact numbers of households per unit area. Their work showed that their method produced better fitting populations than other integerisation approaches (internal validation) but did not provide external validation to demonsrate the extent to which their approach reproduced the accuracy of fractional weights when calculating area level statistics. This paper estimates the mean and median annual household electricity consumption (in kWh) for English MSOAs using standard IPF and Lovelace and Ballas' approaches. It then comapres the results with observed MSOA level electricity consumption for areas where residential consumption is more than 90% of total electricity consumption. + +# Data prep + +Various datasets can be linked at these levels: + + * OA + * urban/rural indicator + * LSOA + * BEIS domestic gas/elec (annual kWh) + * MSOA + * BEIS domestic gas/elec (annual kWh) + * BEIS non-domestic gas/elec (annual kWh) + +## Load MSOA electricity data + +We might as well work at MSOA level rather than try to aggreate LSOA upwards. + +```{r loadBeisMSOA} +dtMsoaDElec <- data.table::fread(paste0(beisPath, "/MSOA_DOM_ELEC_csv/MSOA_ELEC_2018.csv")) + +dtMsoaDElec[, nDElecMeters := METERS] +dtMsoaDElec[, kwhDElec := KWH] +dtMsoaDElec[, kwhDMeanElec := MEAN] +dtMsoaDElec[, kwhDMedianElec := MEDIAN] + + +dtMsoaDGas <- data.table::fread(paste0(beisPath, "/MSOA_DOM_GAS_csv/MSOA_GAS_2018.csv")) +dtMsoaDGas[, nDGasMeters := METERS] +dtMsoaDGas[, kwhDGas := KWH] +dtMsoaDGas[, kwhDMeanGas := MEAN] +dtMsoaDGas[, kwhDMedianGas := MEDIAN] + + +dtMsoaNDElec <- data.table::fread(paste0(beisPath, "/MSOA_ND_ELEC_csv/MSOA_NONDOM_ELEC_2018.csv")) + +dtMsoaNDElec[, nNDElecMeters := METERS] +dtMsoaNDElec[, kwhNDElec := KWH] +dtMsoaNDElec[, kwhNDMeanElec := MEAN] +dtMsoaNDElec[, kwhNDMedianElec := MEDIAN] + + +dtMsoaNDGas <- data.table::fread(paste0(beisPath, "/MSOA_ND_GAS_csv/MSOA_NONDOM_GAS_2018.csv")) +dtMsoaNDGas[, nNDGasMeters := METERS] +dtMsoaNDGas[, kwhNDGas := KWH] +dtMsoaNDGas[, kwhNDMeanGas := MEAN] +dtMsoaNDGas[, kwhNDMedianGas := MEDIAN] + +``` + + +For now what we want to do is flag the MSOAs where most energy demand is residential - we can then use these for validation against the small area model-based estimates. + +```{r linkBeisMSOA} +setkey(dtMsoaDElec, MSOACode) +setkey(dtMsoaNDElec, MSOACode) +setkey(dtMsoaDGas, MSOACode) +setkey(dtMsoaNDGas, MSOACode) + +dtMsoaSpine <- dtMsoaDElec[dtMsoaDGas] +dtMsoaSpine <- dtMsoaSpine[dtMsoaDGas] +dtMsoaSpine <- dtMsoaSpine[dtMsoaNDGas] +dtMsoaSpine <- dtMsoaSpine[dtMsoaNDElec] +dtMsoaSpine[, elecRatio := kwhDElec/(kwhDElec + kwhNDElec)] +dtMsoaSpine[, gasRatio := kwhDGas/(kwhNDGas + kwhDGas)] + +t <- dtMsoaSpine[order(-elecRatio)] +kableExtra::kable(head(t[, .(MSOAName, MSOACode, + elecRatio, kwhDElec, kwhNDElec, + gasRatio, kwhDGas, kwhNDGas)],10), + caption = "All MSOAs") %>% + kable_styling() + +dtMsoaSpineIoW <- dtMsoaSpine[MSOAName %like% "Wight"] + +# how many > X%? +# All MSOAs > consRatioThreshold +nrow(dtMsoaSpine[elecRatio > consRatioThreshold]) + +# Solent sample region? + + +``` + +# Annexes + +## Beis data + +### MSOA +Domestic elec +```{r skimMSOADElec} +# Elec +skimr::skim(dtMsoaDElec) +``` + +Domestic gas +```{r skimMSOADGas} +# gas +skimr::skim(dtMsoaDGas) +``` + +Non-domestic elec +```{r skimMSOANDElec} +# gas +skimr::skim(dtMsoaNDElec) +``` + +Non-domestic gas +```{r skimMSOANDGas} +# gas +skimr::skim(dtMsoaNDGas) +``` + +# Runtime + +```{r check runtime, include=FALSE} +t <- proc.time() - startTime +elapsed <- t[[3]] +``` + +Analysis completed in `r round(elapsed,2)` seconds ( `r round(elapsed/60,2)` minutes) using [knitr](https://cran.r-project.org/package=knitr) in [RStudio](http://www.rstudio.com) with `r R.version.string` running on `r R.version$platform`. + +# R environment + +## R packages used + + * base R [@baseR] + * bookdown [@bookdown] + * data.table [@data.table] + * ggplot2 [@ggplot2] + * kableExtra [@kableExtra] + * knitr [@knitr] + * lubridate [@lubridate] + * rmarkdown [@rmarkdown] + * skimr [@skimr] + * XML [@XML] + +## Session info + +```{r sessionInfo, echo=FALSE} +sessionInfo() +``` + +# References +