From 657c2baf727bfa35661693708b4bb1342b77f149 Mon Sep 17 00:00:00 2001 From: Ben Anderson <dataknut@icloud.com> Date: Wed, 11 Nov 2020 18:50:18 +0000 Subject: [PATCH] restructured and updated, saves out final EPC data with linked geocodes --- EPCsAndCarbon/epcChecks.Rmd | 325 +++-- docs/epcChecks.html | 2421 +++++++++++++++++++---------------- 2 files changed, 1540 insertions(+), 1206 deletions(-) diff --git a/EPCsAndCarbon/epcChecks.Rmd b/EPCsAndCarbon/epcChecks.Rmd index 8bd0a3a..e8a1b9c 100644 --- a/EPCsAndCarbon/epcChecks.Rmd +++ b/EPCsAndCarbon/epcChecks.Rmd @@ -55,14 +55,15 @@ We have to assume the data we have is the _current state of play_ for these dwel # Data loading +## EPCs Load the data for the area of interest - in this case the City of Southampton. ```{r, loadSoton} df <- path.expand("~/data/EW_epc/domestic-E06000045-Southampton/certificates.csv") -allEPCs_DT <- data.table::fread(df) +sotonEPCsDT <- data.table::fread(df) ``` -The EPC data file has `r nrow(allEPCs_DT)` records for Southampton and `r ncol(allEPCs_DT)` variables. We're not interested in all of these, we want: +The EPC data file has `r nrow(sotonEPCsDT)` records for Southampton and `r ncol(sotonEPCsDT)` variables. We're not interested in all of these, we want: * PROPERTY_TYPE: Describes the type of property such as House, Flat, Maisonette etc. This is the type differentiator for dwellings; * BUILT_FORM: The building type of the Property e.g. Detached, Semi-Detached, Terrace etc. Together with the Property Type, the Build Form produces a structured description of the property; @@ -82,15 +83,15 @@ The EPC data file has `r nrow(allEPCs_DT)` records for Southampton and `r ncol(a These may indicate 'non-grid' energy inputs. -## Select most recent records +### Select most recent records If an EPC has been updated or refreshed, the EPC dataset will hold multiple EPC records for that property (see Table \@ref(tab:plotAllRecords)). ```{r, plotAllRecords, fig.cap="All records: Inspection date"} -ggplot2::ggplot(allEPCs_DT, aes(x = INSPECTION_DATE)) + +ggplot2::ggplot(sotonEPCsDT, aes(x = INSPECTION_DATE)) + geom_histogram() -t <- allEPCs_DT[, .(nRecords = .N, +t <- sotonEPCsDT[, .(nRecords = .N, firstDate = min(INSPECTION_DATE), lastDate = max(INSPECTION_DATE)), keyby = .(BUILDING_REFERENCE_NUMBER)] @@ -100,7 +101,7 @@ Figure \@ref(fig:plotAllRecords) shows the inspection date of all EPC records. W ```{r, checkData} # select just these vars -dt <- allEPCs_DT[, .(BUILDING_REFERENCE_NUMBER, LMK_KEY, LODGEMENT_DATE,INSPECTION_DATE, PROPERTY_TYPE, BUILT_FORM, +dt <- sotonEPCsDT[, .(BUILDING_REFERENCE_NUMBER, LMK_KEY, LODGEMENT_DATE,INSPECTION_DATE, PROPERTY_TYPE, BUILT_FORM, ENVIRONMENT_IMPACT_CURRENT, ENERGY_CONSUMPTION_CURRENT, CO2_EMISSIONS_CURRENT, TENURE, PHOTO_SUPPLY, WIND_TURBINE_COUNT, TOTAL_FLOOR_AREA, POSTCODE, LOCAL_AUTHORITY_LABEL)] @@ -129,7 +130,7 @@ ggplot2::ggplot(sotonUniqueEPCsDT, aes(x = INSPECTION_DATE)) + geom_histogram() ``` -## Descriptives +### Descriptives Now check the distributions of the retained variables. @@ -145,9 +146,127 @@ As we can see that we have `r uniqueN(dt$BUILDING_REFERENCE_NUMBER)` unique prop This is not surprising since the kWh/y and TCO2/y values are estimated using a model but before we go any further we'd better check if these are significant in number. -# EPC data checks +## Postcode data -## Check ENERGY_CONSUMPTION_CURRENT +Load postcodes for Southampton (contains other geo-codes for linkage). + +Source: https://geoportal.statistics.gov.uk/datasets/national-statistics-postcode-lookup-august-2020 + +```{r, loadPostcodes} + +# Load the postcode based MSOA codes +soPostcodesDT <- data.table::fread(path.expand("~/data/UK_postcodes/NSPL_AUG_2020_UK/Data/multi_csv/NSPL_AUG_2020_UK_SO.csv")) + +#soPostcodesDT <- soPostcodesDT[is.na(doterm)] # keep current +# keep all as some of the defunct ones will be in the EPC data (!) + +sotonPostcodesDT <- soPostcodesDT[laua == "E06000045"] # keep Southampton City + +sotonPostcodesReducedDT <- sotonPostcodesDT[, .(pcd, pcd2, pcds, laua, msoa11, lsoa11)] + +message("Example data") +head(sotonPostcodesReducedDT) +``` + +## BEIS data + +Load BEIS energy demand data. + +Source: https://geoportal.statistics.gov.uk/datasets/national-statistics-postcode-lookup-august-2020 + +```{r, loadBEIS} +beisElecDT <- data.table::fread("~/data/beis/MSOA_DOM_ELEC_csv/MSOA_ELEC_2018.csv") +sotonElecDT <- beisElecDT[LAName %like% "Southampton", .(nElecMeters = METERS, + beisElecMWh = KWH/1000, + MSOACode, LAName) + ] + + +beisGasDT <- data.table::fread("~/data/beis/MSOA_DOM_GAS_csv/MSOA_GAS_2018.csv") +sotonGasDT <- beisGasDT[LAName %like% "Southampton", .(nGasMeters = METERS, + beisGasMWh = KWH/1000, + MSOACode)] + +setkey(sotonElecDT, MSOACode) +setkey(sotonGasDT, MSOACode) +sotonEnergyDT <- sotonGasDT[sotonElecDT] +sotonEnergyDT[, beisEnergyMWh := beisElecMWh + beisGasMWh] +#head(sotonEnergyDT) +message("Example data (retained variables)") +head(sotonEnergyDT) +``` + +## Census data + +Load Census 2011 tenure data. + +Source: https://www.nomisweb.co.uk/census/2011/ks402ew + +```{r, loadCensus} +# census tenure ---- +dt <- data.table::fread(path.expand("~/data/census2011/2011_MSOA_householdTenure_Soton.csv")) + +dt[, census2011_socialRent := `Tenure: Social rented; measures: Value`] +dt[, census2011_privateRent := `Tenure: Private rented; measures: Value`] +dt[, census2011_ownerOccupy := `Tenure: Owned; measures: Value`] +dt[, census2011_other := `Tenure: Living rent free; measures: Value`] +dt[, MSOACode := `geography code`] + +dt[, hhCheck := census2011_socialRent + census2011_privateRent + census2011_ownerOccupy + census2011_other] +dt[, nHHs_tenure := `Tenure: All households; measures: Value`] + +dt[, socRent_pc := 100*(census2011_socialRent/nHHs_tenure)] +dt[, privRent_pc := 100*(census2011_privateRent/nHHs_tenure)] +dt[, ownerOcc_pc := 100*(census2011_ownerOccupy/nHHs_tenure)] + +tenureDT <- dt[, .(MSOACode, nHHs_tenure, socRent_pc, privRent_pc, ownerOcc_pc)] +message("Example data (retained variables)") +head(tenureDT) # all tenure data +``` + +## Deprivation data + +Load IMD data. + +Source: https://www.nomisweb.co.uk/census/2011/qs119ew + +```{r, loadDeprivation} + + +# add the deprivation data by MSOA +dt <- data.table::fread(path.expand("~/data/census2011/2011_MSOA_deprivation.csv")) +dt[, nHHs_deprivation := `Household Deprivation: All categories: Classification of household deprivation; measures: Value`] +dt[, MSOACode := `geography code`] + +#sotonDep_DT[, .(nHouseholds = sum(totalHouseholds)), keyby = .(LAName)] + +dt[, dep0_pc := 100*(`Household Deprivation: Household is not deprived in any dimension; measures: Value`/nHHs_deprivation)] +dt[, dep1_pc := 100*(`Household Deprivation: Household is deprived in 1 dimension; measures: Value`/nHHs_deprivation)] +dt[, dep2_pc := 100*(`Household Deprivation: Household is deprived in 2 dimensions; measures: Value`/nHHs_deprivation)] +dt[, dep3_pc := 100*(`Household Deprivation: Household is deprived in 3 dimensions; measures: Value`/nHHs_deprivation)] +dt[, dep4_pc := 100*(`Household Deprivation: Household is deprived in 4 dimensions; measures: Value`/nHHs_deprivation)] + +deprivationDT <- dt[, .(MSOACode, nHHs_deprivation, dep0_pc, dep1_pc, dep2_pc, dep3_pc, dep4_pc)] +# sneak the LA name in there too +dt <- sotonEnergyDT[,.(MSOACode,LAName)] +setkey(dt, MSOACode) +setkey(deprivationDT, MSOACode) + +sotonDeprivationDT <- deprivationDT[dt] # has the side effect of dropping non-Soton MSOAs + +message("Example data (retained variables)") +head(sotonDeprivationDT) + +# merge with census for future use +setkey(sotonDeprivationDT, MSOACode) +setkey(tenureDT, MSOACode) +sotonCensus2011_DT <- tenureDT[sotonDeprivationDT] # only Soton MSOAs + +``` + +# Data checks + +## EPC: Check ENERGY_CONSUMPTION_CURRENT We recode the current energy consumption into categories for comparison with other low values and the presence of wind turbines/PV. We use -ve, 0 and 1 kWh as the thresholds of interest. @@ -198,7 +317,7 @@ ggplot2::ggplot(sotonUniqueEPCsDT[TENURE != "NO DATA!" & theme(legend.position = "bottom") ``` -## Check CO2_EMISSIONS_CURRENT +## EPC: Check CO2_EMISSIONS_CURRENT Next we do the same for current emissions. Repeat the coding for total floor area using 0 and 1 TCO2/y as the threshold of interest. @@ -231,12 +350,12 @@ kableExtra::kable(round(100*(prop.table(table(sotonUniqueEPCsDT$emissionsFlag, There are `r nZeroEmissions` properties with 0 or negative emissions. It looks like they are also the properties with -ve kWh as we might expect. So we can safely ignore them. -## Check ENVIRONMENT_IMPACT_CURRENT +## EPC: Check ENVIRONMENT_IMPACT_CURRENT `Environmental impact` should decrease as emissions increase. ```{r, checkImpact, fig.cap="Histogram of ENVIRONMENT_IMPACT_CURRENT"} -ggplot2::ggplot(allEPCs_DT, aes(x = ENVIRONMENT_IMPACT_CURRENT)) + +ggplot2::ggplot(sotonEPCsDT, aes(x = ENVIRONMENT_IMPACT_CURRENT)) + geom_histogram() ``` @@ -244,7 +363,7 @@ So what is the relationship between ENVIRONMENT_IMPACT_CURRENT and CO2_EMISSIONS ```{r, checkEmissionsImpact, fig.cap="Plot of ENVIRONMENT_IMPACT_CURRENT vs CO2_EMISSIONS_CURRENT"} -ggplot2::ggplot(allEPCs_DT, aes(x = CO2_EMISSIONS_CURRENT, +ggplot2::ggplot(sotonEPCsDT, aes(x = CO2_EMISSIONS_CURRENT, y = ENVIRONMENT_IMPACT_CURRENT, colour = TENURE)) + geom_point() + @@ -252,7 +371,7 @@ ggplot2::ggplot(allEPCs_DT, aes(x = CO2_EMISSIONS_CURRENT, theme(legend.position = "bottom") ``` -## Check TOTAL_FLOOR_AREA +## EPC: Check TOTAL_FLOOR_AREA Repeat the coding for total floor area using 5 m2 as the threshold of interest. @@ -264,8 +383,8 @@ nZeroFloorArea <- nrow(sotonUniqueEPCsDT[TOTAL_FLOOR_AREA < 0]) sotonUniqueEPCsDT[, floorFlag := ifelse(TOTAL_FLOOR_AREA == 0, "0 m2", NA)] sotonUniqueEPCsDT[, floorFlag := ifelse(TOTAL_FLOOR_AREA > 0 & - TOTAL_FLOOR_AREA <= 10, "0-5 m2", floorFlag)] -sotonUniqueEPCsDT[, floorFlag := ifelse(TOTAL_FLOOR_AREA > 10, "5+ m2", floorFlag)] + TOTAL_FLOOR_AREA <= 5, "0-5 m2", floorFlag)] +sotonUniqueEPCsDT[, floorFlag := ifelse(TOTAL_FLOOR_AREA > 5, "5+ m2", floorFlag)] t <- with(sotonUniqueEPCsDT, table(floorFlag, consFlag)) @@ -287,101 +406,23 @@ Table \@ref(tab:checkEmissions) shows that the properties with floor area of < 1 The scale of the x axis also suggests a few very large properties. -## Data summary - -We have identified some issues with a small number of the properties in the EPC dataset. These are not unexpected given that much of the estimates rely on partial or presumed data. Data entry errors are also quite likely. As a result we exclude: - - * any property where ENERGY_CONSUMPTION_CURRENT <= 0 - * any property where TOTAL_FLOOR_AREA <= 5 - * any property where CO2_EMISSIONS_CURRENT <= 0 - -```{r, finalData} -finalEPCDT <- sotonUniqueEPCsDT[ENERGY_CONSUMPTION_CURRENT > 0 & - TOTAL_FLOOR_AREA > 5 & - CO2_EMISSIONS_CURRENT > 0] - -skimr::skim(finalEPCDT) -``` - -This leaves us with a total of `r prettyNum(nrow(finalEPCDT), big.mark = ",")` properties. - -```{r, saveFinalData} -of <- path.expand("~/data/EW_epc/domestic-E06000045-Southampton/finalClean.csv") -data.table::fwrite(finalEPCDT, file = of) - -message("Gziping ", of) -# Gzip it -# in case it fails (it will on windows - you will be left with a .csv file) -try(system( paste0("gzip -f '", of,"'"))) # include ' or it breaks on spaces -message("Gzipped ", of) - -``` - - -# Check 'missing' EPC rates +## EPC: Check 'missing' EPC rates We know that we do not have EPC records for every dwelling. But how many are we missing? We will check this at MSOA level as it allows us to link to other MSOA level datasets that tell us how many households, dwellings or energy meters to expect. Arguably it would be better to do this at LSOA level but... -First we'll use the BEIS 2018 MSOA level annual electricity data to estimate the number of meters (not properties) - some addresses can have 2 meters (e.g. standard & economy 7). This is more useful than the number of gas meters since not all dwellings have mains gas but all have an electricity meter. +First we'll use the BEIS 2018 MSOA level annual electricity data to estimate the number of meters (not properties) - some addresses can have 2 meters (e.g. standard & economy 7). However this is more useful than the number of gas meters since not all dwellings have mains gas but all (should?) have an electricity meter. -```{r, checkBEIS} -beisElecDT <- data.table::fread("~/data/beis/MSOA_DOM_ELEC_csv/MSOA_ELEC_2018.csv") -sotonElecDT <- beisElecDT[LAName %like% "Southampton", .(nElecMeters = METERS, - beisElecMWh = KWH/1000, - MSOACode, LAName) - ] - - -beisGasDT <- data.table::fread("~/data/beis/MSOA_DOM_GAS_csv/MSOA_GAS_2018.csv") -sotonGasDT <- beisGasDT[LAName %like% "Southampton", .(nGasMeters = METERS, - beisGasMWh = KWH/1000, - MSOACode)] - -setkey(sotonElecDT, MSOACode) -setkey(sotonGasDT, MSOACode) -sotonEnergyDT <- sotonGasDT[sotonElecDT] -sotonEnergyDT[, beisEnergyMWh := beisElecMWh + beisGasMWh] -#head(sotonEnergyDT) +```{r, checkBEISmeters} +sotonEnergyDT[, .(nElecMeters = sum(nElecMeters), + nGasMeters = sum(nGasMeters)), keyby = .(LAName)] ``` Next we'll check for the number of households reported by the 2011 Census. -> would be better to use dwellings but this gives us tenure +> would be better to use dwellings but this gives us tenure as well ```{r, checkCensus} #censusDT <- data.table::fread(path.expand("~/data/")) -# IMD ---- -deprivationDT <- data.table::fread(path.expand("~/data/census2011/2011_MSOA_deprivation.csv")) -deprivationDT[, totalHouseholds := `Household Deprivation: All categories: Classification of household deprivation; measures: Value`] -deprivationDT[, MSOACode := `geography code`] -setkey(deprivationDT, MSOACode) -setkey(sotonElecDT, MSOACode) -# link LA name from Soton elec for now -sotonDep_DT <- deprivationDT[sotonElecDT[, .(MSOACode, LAName)]] -sotonDep_DT[, nHHs_deprivation := `Household Deprivation: All categories: Classification of household deprivation; measures: Value`] - -#sotonDep_DT[, .(nHouseholds = sum(totalHouseholds)), keyby = .(LAName)] - -# census tenure ---- -sotonTenureDT <- data.table::fread(path.expand("~/data/census2011/2011_MSOA_householdTenure_Soton.csv")) - -sotonTenureDT[, census2011_socialRent := `Tenure: Social rented; measures: Value`] -sotonTenureDT[, census2011_privateRent := `Tenure: Private rented; measures: Value`] -sotonTenureDT[, census2011_ownerOccupy := `Tenure: Owned; measures: Value`] -sotonTenureDT[, census2011_other := `Tenure: Living rent free; measures: Value`] -sotonTenureDT[, MSOACode := `geography code`] - -sotonTenureDT[, hhCheck := census2011_socialRent + census2011_privateRent + census2011_ownerOccupy + census2011_other] -sotonTenureDT[, nHHs_tenure := `Tenure: All households; measures: Value`] - -# summary(sotonTenureDT[, .(hhCheck, nHHs_tenure)]) -# might not quite match due to cell perturbation etc? - -# join em ---- -setkey(sotonDep_DT, MSOACode) -setkey(sotonTenureDT, MSOACode) - -sotonCensus2011_DT <- sotonTenureDT[sotonDep_DT] t <- sotonCensus2011_DT[, .(sum_Deprivation = sum(nHHs_deprivation), sum_Tenure = sum(nHHs_tenure)), keyby = .(LAName)] @@ -393,13 +434,6 @@ That's lower (as expected) but doesn't allow for dwellings that were empty on ce ```{r, checkPostcodes} # Postcodes don't help - no count of addresses in the data (there used to be??) # but we can use it to check which Soton postcodes are missing from the EPC file -soPostcodesDT <- data.table::fread(path.expand("~/data/UK_postcodes/NSPL_AUG_2020_UK/Data/multi_csv/NSPL_AUG_2020_UK_SO.csv")) - -soPostcodesDT <- soPostcodesDT[is.na(doterm)] # keep current - -sotonPostcodesDT <- soPostcodesDT[laua == "E06000045"] # keep Southampton City - -sotonPostcodesReducedDT <- sotonPostcodesDT[, .(pcd, pcd2, pcds, laua, msoa11, lsoa11)] sotonPostcodesReducedDT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(pcds, split = " " @@ -413,13 +447,13 @@ We should not have single digit postcodes in the postcode data - i.e. S01 should # EPC # set up counters # use final cleaned EPC data -finalEPCDT[, epcIsSocialRent := ifelse(TENURE == "rental (social)", 1, 0)] -finalEPCDT[, epcIsPrivateRent := ifelse(TENURE == "rental (private)", 1, 0)] -finalEPCDT[, epcIsOwnerOcc := ifelse(TENURE == "owner-occupied", 1, 0)] -finalEPCDT[, epcIsUnknownTenure := ifelse(TENURE == "NO DATA!" | +sotonUniqueEPCsDT[, epcIsSocialRent := ifelse(TENURE == "rental (social)", 1, 0)] +sotonUniqueEPCsDT[, epcIsPrivateRent := ifelse(TENURE == "rental (private)", 1, 0)] +sotonUniqueEPCsDT[, epcIsOwnerOcc := ifelse(TENURE == "owner-occupied", 1, 0)] +sotonUniqueEPCsDT[, epcIsUnknownTenure := ifelse(TENURE == "NO DATA!" | TENURE == "" , 1, 0)] # aggregate EPCs to postcodes -sotonEpcPostcodes_DT <- finalEPCDT[, .(nEPCs = .N, +sotonEpcPostcodes_DT <- sotonUniqueEPCsDT[, .(nEPCs = .N, sumEPC_tCO2 = sum(CO2_EMISSIONS_CURRENT, na.rm = TRUE), n_epcIsSocialRent = sum(epcIsSocialRent, na.rm = TRUE), n_epcIsPrivateRent = sum(epcIsPrivateRent, na.rm = TRUE), @@ -435,11 +469,11 @@ sotonEpcPostcodes_DT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(POSTCODE, sotonEpcPostcodes_DT[, .(nEPCs = .N), keyby = .(pc_chunk1)] # check original EPC data for Soton - which postcodes are covered? -allEPCs_DT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(POSTCODE, +sotonEPCsDT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(POSTCODE, split = " " ) ] -allEPCs_DT[, .(nEPCs = .N), keyby = .(pc_chunk1)] +sotonEPCsDT[, .(nEPCs = .N), keyby = .(pc_chunk1)] ``` It looks like we have EPCs for each postcode sector which is good. @@ -479,7 +513,7 @@ Join the estimates together at MSOA level for comparison. There are `r uniqueN(s ```{r, joinMSOA} # 32 LSOAs in Soton -# add deprivation +# add census & deprivation to energy setkey(sotonEnergyDT, MSOACode) setkey(sotonCensus2011_DT, MSOACode) setkey(sotonEpcMSOA_DT, MSOACode) @@ -494,8 +528,8 @@ sotonMSOA_DT <- sotonEpcMSOA_DT[sotonMSOA_DT] msoaNamesDT <- data.table::as.data.table(readxl::read_xlsx(path.expand("~/data/UK_postcodes/NSPL_AUG_2020_UK/Documents/MSOA (2011) names and codes UK as at 12_12.xlsx"))) msoaNamesDT[, MSOACode := MSOA11CD] msoaNamesDT[, MSOAName := MSOA11NM] -setkey(msoaNamesDT, MSOACode) +setkey(msoaNamesDT, MSOACode) sotonMSOA_DT <- msoaNamesDT[sotonMSOA_DT] #names(sotonMSOA_DT) @@ -530,11 +564,6 @@ We can also see that despite having 'missing' EPCs, the estimated total EPC-deri ```{r, missingEPCbyMSOA, fig.cap="% 'missing' rates comparison"} -sotonMSOA_DT[, dep0_pc := 100*(`Household Deprivation: Household is not deprived in any dimension; measures: Value`/nHHs_deprivation)] -sotonMSOA_DT[, socRent_pc := 100*(census2011_socialRent/nHHs_tenure)] -sotonMSOA_DT[, privRent_pc := 100*(census2011_privateRent/nHHs_tenure)] -sotonMSOA_DT[, ownerOcc_pc := 100*(census2011_ownerOccupy/nHHs_tenure)] - t <- sotonMSOA_DT[, .(MSOAName, MSOACode, nHHs_tenure,nElecMeters,nEPCs, dep0_pc, socRent_pc, privRent_pc, ownerOcc_pc,sumEpcMWh, beisEnergyMWh )] @@ -585,7 +614,7 @@ outlier <- t[sumEpcMWh > 70000] Figure \@ref(fig:energyMSOAPlot) shows that both of these are true. MSOAs with a high proportion of owner occupiers (and therefore more likely to have missing EPCs) tend to have higher observed energy demand than the EOC data suggests - they are above the reference line. MSOAs with a lower proportion of owner occupiers (and therefore more likely to have more complete EPC coverage) tend to be on or below the line. As before we have the same notable outlier (`r outlier$MSOACode`) and for the same reasons... In this case this produces a much higher energy demand estimate than the BEIS 2018 data records. -# Check BEIS data +## BEIS: Check data While we're here we'll also check the BEIS data. Table \@ref(tab:beisDesc) shows the five highest and lowest MSOAs by annual electricity use. @@ -604,7 +633,57 @@ kableExtra::kable(t2, caption = "Southampton MSOAs: BEIS 2018 energy data ordere ``` -# Save MSOA aggregates for re-use +# Summarise and save EPC data for re-use + +We have identified some issues with a small number of the properties in the EPC dataset. These are not unexpected given that much of the estimates rely on partial or presumed data. Data entry errors are also quite likely. As a result we exclude: + + * any property where ENERGY_CONSUMPTION_CURRENT <= 0 + * any property where TOTAL_FLOOR_AREA <= 5 + * any property where CO2_EMISSIONS_CURRENT <= 0 + +```{r, finalData} +finalEPCDT <- sotonUniqueEPCsDT[ENERGY_CONSUMPTION_CURRENT > 0 & + TOTAL_FLOOR_AREA > 5 & + CO2_EMISSIONS_CURRENT > 0] + +skimr::skim(finalEPCDT) +``` + +This leaves us with a total of `r prettyNum(nrow(finalEPCDT), big.mark = ",")` properties. + +```{r, saveFinalData} +library(stringr) +finalEPCDT[, POSTCODE_s := stringr::str_remove_all(POSTCODE, " ")] +sotonPostcodesReducedDT[, POSTCODE_s := stringr::str_remove_all(pcds, " ")] +setkey(finalEPCDT, POSTCODE_s) +setkey(sotonPostcodesReducedDT, POSTCODE_s) +dt <- sotonPostcodesReducedDT[finalEPCDT] +dt[, MSOACode := msoa11] + +setkey(dt, MSOACode) +setkey(sotonCensus2011_DT, MSOACode) + +dt <- sotonCensus2011_DT[dt] + +of <- path.expand("~/data/EW_epc/domestic-E06000045-Southampton/EPCs_liveFinalClean.csv") +data.table::fwrite(dt, file = of) + +message("Gziping ", of) +# Gzip it +# in case it fails (it will on windows - you will be left with a .csv file) +try(system( paste0("gzip -f '", of,"'"))) # include ' or it breaks on spaces +message("Gzipped ", of) + +``` + +NB: this failed to match an EPC postcode to an MSOA for `r nrow(dt[is.na(MSOACode)])` EPCs The table below shows which postcodes these were by date. + +```{r, nonMatches} +dt[is.na(MSOACode), .(nEPCs = .N), keyby = .(POSTCODE_s, TENURE, INSPECTION_DATE)] +``` + + +# Summarise and save MSOA aggregates for re-use Finally we save the MSOA table into the repo data directory for future use. We don't usually advocate keeping data in a git repo but this is small, aggregated and [mostly harmless](https://en.wikipedia.org/wiki/Mostly_Harmless). diff --git a/docs/epcChecks.html b/docs/epcChecks.html index 80f2799..7272c2a 100644 --- a/docs/epcChecks.html +++ b/docs/epcChecks.html @@ -1855,7 +1855,7 @@ div.tocify { <h1 class="title toc-ignore">Checking EPC datasets for Southampton</h1> <h3 class="subtitle">Data cleaning, outlier checks and coverage analysis</h3> <h4 class="author">Ben Anderson</h4> -<h4 class="date">Last run at: 2020-11-10 17:31:09</h4> +<h4 class="date">Last run at: 2020-11-11 17:02:14</h4> </div> @@ -1876,13 +1876,11 @@ div.tocify { </div> <div id="data-loading" class="section level1"> <h1><span class="header-section-number">2</span> Data loading</h1> +<div id="epcs" class="section level2"> +<h2><span class="header-section-number">2.1</span> EPCs</h2> <p>Load the data for the area of interest - in this case the City of Southampton.</p> <pre class="r"><code>df <- path.expand("~/data/EW_epc/domestic-E06000045-Southampton/certificates.csv") -allEPCs_DT <- data.table::fread(df)</code></pre> -<pre><code>## Warning in require_bit64_if_needed(ans): Some columns are type 'integer64' but package bit64 is not -## installed. Those columns will print as strange looking floating point data. There is no need to reload -## the data. Simply install.packages('bit64') to obtain the integer64 print method and print the data -## again.</code></pre> +sotonEPCsDT <- data.table::fread(df)</code></pre> <p>The EPC data file has 91833 records for Southampton and 90 variables. We’re not interested in all of these, we want:</p> <ul> <li>PROPERTY_TYPE: Describes the type of property such as House, Flat, Maisonette etc. This is the type differentiator for dwellings;</li> @@ -1902,10 +1900,10 @@ allEPCs_DT <- data.table::fread(df)</code></pre> <li>INSPECTION_DATE - so we can select the most receitn</li> </ul> <p>These may indicate ‘non-grid’ energy inputs.</p> -<div id="select-most-recent-records" class="section level2"> -<h2><span class="header-section-number">2.1</span> Select most recent records</h2> +<div id="select-most-recent-records" class="section level3"> +<h3><span class="header-section-number">2.1.1</span> Select most recent records</h3> <p>If an EPC has been updated or refreshed, the EPC dataset will hold multiple EPC records for that property (see Table <a href="#tab:plotAllRecords">2.1</a>).</p> -<pre class="r"><code>ggplot2::ggplot(allEPCs_DT, aes(x = INSPECTION_DATE)) + +<pre class="r"><code>ggplot2::ggplot(sotonEPCsDT, aes(x = INSPECTION_DATE)) + geom_histogram()</code></pre> <pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre> <div class="figure"><span id="fig:plotAllRecords"></span> @@ -1914,7 +1912,7 @@ allEPCs_DT <- data.table::fread(df)</code></pre> Figure 2.1: All records: Inspection date </p> </div> -<pre class="r"><code>t <- allEPCs_DT[, .(nRecords = .N, +<pre class="r"><code>t <- sotonEPCsDT[, .(nRecords = .N, firstDate = min(INSPECTION_DATE), lastDate = max(INSPECTION_DATE)), keyby = .(BUILDING_REFERENCE_NUMBER)] @@ -1942,7 +1940,7 @@ lastDate <tbody> <tr> <td style="text-align:right;"> -0 +444 </td> <td style="text-align:right;"> 3 @@ -1956,7 +1954,7 @@ lastDate </tr> <tr> <td style="text-align:right;"> -0 +668 </td> <td style="text-align:right;"> 2 @@ -1970,7 +1968,7 @@ lastDate </tr> <tr> <td style="text-align:right;"> -0 +697 </td> <td style="text-align:right;"> 2 @@ -1984,7 +1982,7 @@ lastDate </tr> <tr> <td style="text-align:right;"> -0 +805 </td> <td style="text-align:right;"> 2 @@ -1998,7 +1996,7 @@ lastDate </tr> <tr> <td style="text-align:right;"> -0 +871 </td> <td style="text-align:right;"> 2 @@ -2012,7 +2010,7 @@ lastDate </tr> <tr> <td style="text-align:right;"> -0 +1362 </td> <td style="text-align:right;"> 2 @@ -2028,7 +2026,7 @@ lastDate </table> <p>Figure <a href="#fig:plotAllRecords">2.1</a> shows the inspection date of all EPC records. We want to just select the most recent as we are not currently interested in change over time.</p> <pre class="r"><code># select just these vars -dt <- allEPCs_DT[, .(BUILDING_REFERENCE_NUMBER, LMK_KEY, LODGEMENT_DATE,INSPECTION_DATE, PROPERTY_TYPE, BUILT_FORM, +dt <- sotonEPCsDT[, .(BUILDING_REFERENCE_NUMBER, LMK_KEY, LODGEMENT_DATE,INSPECTION_DATE, PROPERTY_TYPE, BUILT_FORM, ENVIRONMENT_IMPACT_CURRENT, ENERGY_CONSUMPTION_CURRENT, CO2_EMISSIONS_CURRENT, TENURE, PHOTO_SUPPLY, WIND_TURBINE_COUNT, TOTAL_FLOOR_AREA, POSTCODE, LOCAL_AUTHORITY_LABEL)] @@ -2063,8 +2061,8 @@ Figure 2.2: Latest records: Inspection date </p> </div> </div> -<div id="descriptives" class="section level2"> -<h2><span class="header-section-number">2.2</span> Descriptives</h2> +<div id="descriptives" class="section level3"> +<h3><span class="header-section-number">2.1.2</span> Descriptives</h3> <p>Now check the distributions of the retained variables.</p> <pre class="r"><code>skimr::skim(sotonUniqueEPCsDT)</code></pre> <pre><code>## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling back to @@ -2713,10 +2711,140 @@ TOTAL_FLOOR_AREA <p>This is not surprising since the kWh/y and TCO2/y values are estimated using a model but before we go any further we’d better check if these are significant in number.</p> </div> </div> -<div id="epc-data-checks" class="section level1"> -<h1><span class="header-section-number">3</span> EPC data checks</h1> -<div id="check-energy_consumption_current" class="section level2"> -<h2><span class="header-section-number">3.1</span> Check ENERGY_CONSUMPTION_CURRENT</h2> +<div id="postcode-data" class="section level2"> +<h2><span class="header-section-number">2.2</span> Postcode data</h2> +<p>Load postcodes for Southampton (contains other geo-codes for linkage).</p> +<p>Source: <a href="https://geoportal.statistics.gov.uk/datasets/national-statistics-postcode-lookup-august-2020" class="uri">https://geoportal.statistics.gov.uk/datasets/national-statistics-postcode-lookup-august-2020</a></p> +<pre class="r"><code># Load the postcode based MSOA codes +soPostcodesDT <- data.table::fread(path.expand("~/data/UK_postcodes/NSPL_AUG_2020_UK/Data/multi_csv/NSPL_AUG_2020_UK_SO.csv")) + +#soPostcodesDT <- soPostcodesDT[is.na(doterm)] # keep current +# keep all as some of the defunct ones will be in the EPC data (!) + +sotonPostcodesDT <- soPostcodesDT[laua == "E06000045"] # keep Southampton City + +sotonPostcodesReducedDT <- sotonPostcodesDT[, .(pcd, pcd2, pcds, laua, msoa11, lsoa11)] + +message("Example data")</code></pre> +<pre><code>## Example data</code></pre> +<pre class="r"><code>head(sotonPostcodesReducedDT)</code></pre> +<pre><code>## pcd pcd2 pcds laua msoa11 lsoa11 +## 1: SO1 0AA SO1 0AA SO1 0AA E06000045 E02003577 E01032748 +## 2: SO1 0AB SO1 0AB SO1 0AB E06000045 E02003577 E01032748 +## 3: SO1 0AD SO1 0AD SO1 0AD E06000045 E02003577 E01032748 +## 4: SO1 0AE SO1 0AE SO1 0AE E06000045 E02003571 E01017140 +## 5: SO1 0AF SO1 0AF SO1 0AF E06000045 E02003571 E01017140 +## 6: SO1 0AG SO1 0AG SO1 0AG E06000045 E02003577 E01032748</code></pre> +</div> +<div id="beis-data" class="section level2"> +<h2><span class="header-section-number">2.3</span> BEIS data</h2> +<p>Load BEIS energy demand data.</p> +<p>Source: <a href="https://geoportal.statistics.gov.uk/datasets/national-statistics-postcode-lookup-august-2020" class="uri">https://geoportal.statistics.gov.uk/datasets/national-statistics-postcode-lookup-august-2020</a></p> +<pre class="r"><code>beisElecDT <- data.table::fread("~/data/beis/MSOA_DOM_ELEC_csv/MSOA_ELEC_2018.csv") +sotonElecDT <- beisElecDT[LAName %like% "Southampton", .(nElecMeters = METERS, + beisElecMWh = KWH/1000, + MSOACode, LAName) + ] + + +beisGasDT <- data.table::fread("~/data/beis/MSOA_DOM_GAS_csv/MSOA_GAS_2018.csv") +sotonGasDT <- beisGasDT[LAName %like% "Southampton", .(nGasMeters = METERS, + beisGasMWh = KWH/1000, + MSOACode)] + +setkey(sotonElecDT, MSOACode) +setkey(sotonGasDT, MSOACode) +sotonEnergyDT <- sotonGasDT[sotonElecDT] +sotonEnergyDT[, beisEnergyMWh := beisElecMWh + beisGasMWh] +#head(sotonEnergyDT) +message("Example data (retained variables)")</code></pre> +<pre><code>## Example data (retained variables)</code></pre> +<pre class="r"><code>head(sotonEnergyDT)</code></pre> +<pre><code>## nGasMeters beisGasMWh MSOACode nElecMeters beisElecMWh LAName beisEnergyMWh +## 1: 2557 38480.93 E02003549 2832 11196.005 Southampton 49676.93 +## 2: 2876 28049.73 E02003550 3527 13074.440 Southampton 41124.17 +## 3: 1649 17358.87 E02003551 2446 8957.742 Southampton 26316.61 +## 4: 2009 17667.12 E02003552 2809 10383.889 Southampton 28051.01 +## 5: 2303 24996.91 E02003553 2464 8479.993 Southampton 33476.91 +## 6: 2378 29664.71 E02003554 2873 10048.060 Southampton 39712.77</code></pre> +</div> +<div id="census-data" class="section level2"> +<h2><span class="header-section-number">2.4</span> Census data</h2> +<p>Load Census 2011 tenure data.</p> +<p>Source: <a href="https://www.nomisweb.co.uk/census/2011/ks402ew" class="uri">https://www.nomisweb.co.uk/census/2011/ks402ew</a></p> +<pre class="r"><code># census tenure ---- +dt <- data.table::fread(path.expand("~/data/census2011/2011_MSOA_householdTenure_Soton.csv")) + +dt[, census2011_socialRent := `Tenure: Social rented; measures: Value`] +dt[, census2011_privateRent := `Tenure: Private rented; measures: Value`] +dt[, census2011_ownerOccupy := `Tenure: Owned; measures: Value`] +dt[, census2011_other := `Tenure: Living rent free; measures: Value`] +dt[, MSOACode := `geography code`] + +dt[, hhCheck := census2011_socialRent + census2011_privateRent + census2011_ownerOccupy + census2011_other] +dt[, nHHs_tenure := `Tenure: All households; measures: Value`] + +dt[, socRent_pc := 100*(census2011_socialRent/nHHs_tenure)] +dt[, privRent_pc := 100*(census2011_privateRent/nHHs_tenure)] +dt[, ownerOcc_pc := 100*(census2011_ownerOccupy/nHHs_tenure)] + +tenureDT <- dt[, .(MSOACode, nHHs_tenure, socRent_pc, privRent_pc, ownerOcc_pc)] +message("Example data (retained variables)")</code></pre> +<pre><code>## Example data (retained variables)</code></pre> +<pre class="r"><code>head(tenureDT) # all tenure data</code></pre> +<pre><code>## MSOACode nHHs_tenure socRent_pc privRent_pc ownerOcc_pc +## 1: E02002559 3646 4.443225 16.346681 77.53703 +## 2: E02002560 2511 2.907208 12.385504 83.91079 +## 3: E02002561 2507 11.408057 8.575987 79.17830 +## 4: E02002562 2933 23.389021 26.321173 49.36925 +## 5: E02002563 2343 23.772941 8.664106 65.98378 +## 6: E02002564 4137 29.804206 8.581097 60.04351</code></pre> +</div> +<div id="deprivation-data" class="section level2"> +<h2><span class="header-section-number">2.5</span> Deprivation data</h2> +<p>Load IMD data.</p> +<p>Source: <a href="https://www.nomisweb.co.uk/census/2011/qs119ew" class="uri">https://www.nomisweb.co.uk/census/2011/qs119ew</a></p> +<pre class="r"><code># add the deprivation data by MSOA +dt <- data.table::fread(path.expand("~/data/census2011/2011_MSOA_deprivation.csv")) +dt[, nHHs_deprivation := `Household Deprivation: All categories: Classification of household deprivation; measures: Value`] +dt[, MSOACode := `geography code`] + +#sotonDep_DT[, .(nHouseholds = sum(totalHouseholds)), keyby = .(LAName)] + +dt[, dep0_pc := 100*(`Household Deprivation: Household is not deprived in any dimension; measures: Value`/nHHs_deprivation)] +dt[, dep1_pc := 100*(`Household Deprivation: Household is deprived in 1 dimension; measures: Value`/nHHs_deprivation)] +dt[, dep2_pc := 100*(`Household Deprivation: Household is deprived in 2 dimensions; measures: Value`/nHHs_deprivation)] +dt[, dep3_pc := 100*(`Household Deprivation: Household is deprived in 3 dimensions; measures: Value`/nHHs_deprivation)] +dt[, dep4_pc := 100*(`Household Deprivation: Household is deprived in 4 dimensions; measures: Value`/nHHs_deprivation)] + +deprivationDT <- dt[, .(MSOACode, nHHs_deprivation, dep0_pc, dep1_pc, dep2_pc, dep3_pc, dep4_pc)] +# sneak the LA name in there too +dt <- sotonEnergyDT[,.(MSOACode,LAName)] +setkey(dt, MSOACode) +setkey(deprivationDT, MSOACode) + +sotonDeprivationDT <- deprivationDT[dt] # has the side effect of dropping non-Soton MSOAs + +message("Example data (retained variables)")</code></pre> +<pre><code>## Example data (retained variables)</code></pre> +<pre class="r"><code>head(sotonDeprivationDT)</code></pre> +<pre><code>## MSOACode nHHs_deprivation dep0_pc dep1_pc dep2_pc dep3_pc dep4_pc LAName +## 1: E02003549 2849 52.36925 32.88873 12.24991 2.316602 0.1755002 Southampton +## 2: E02003550 3216 43.09701 32.92910 18.19030 5.254975 0.5286070 Southampton +## 3: E02003551 2256 33.68794 33.99823 23.00532 8.289007 1.0195035 Southampton +## 4: E02003552 2646 28.11791 32.01058 29.28949 9.901738 0.6802721 Southampton +## 5: E02003553 2394 39.01420 32.95739 19.88304 6.975773 1.1695906 Southampton +## 6: E02003554 2646 46.48526 32.38851 17.27135 3.514739 0.3401361 Southampton</code></pre> +<pre class="r"><code># merge with census for future use +setkey(sotonDeprivationDT, MSOACode) +setkey(tenureDT, MSOACode) +sotonCensus2011_DT <- tenureDT[sotonDeprivationDT] # only Soton MSOAs</code></pre> +</div> +</div> +<div id="data-checks" class="section level1"> +<h1><span class="header-section-number">3</span> Data checks</h1> +<div id="epc-check-energy_consumption_current" class="section level2"> +<h2><span class="header-section-number">3.1</span> EPC: Check ENERGY_CONSUMPTION_CURRENT</h2> <p>We recode the current energy consumption into categories for comparison with other low values and the presence of wind turbines/PV. We use -ve, 0 and 1 kWh as the thresholds of interest.</p> <pre class="r"><code>ggplot2::ggplot(sotonUniqueEPCsDT, aes(x = ENERGY_CONSUMPTION_CURRENT)) + geom_histogram(binwidth = 5) + @@ -3039,8 +3167,8 @@ Figure 3.2: Comparing distributions of ENERGY_CONSUMPTION_CURRENT by tenure and </p> </div> </div> -<div id="check-co2_emissions_current" class="section level2"> -<h2><span class="header-section-number">3.2</span> Check CO2_EMISSIONS_CURRENT</h2> +<div id="epc-check-co2_emissions_current" class="section level2"> +<h2><span class="header-section-number">3.2</span> EPC: Check CO2_EMISSIONS_CURRENT</h2> <p>Next we do the same for current emissions. Repeat the coding for total floor area using 0 and 1 TCO2/y as the threshold of interest.</p> <pre class="r"><code>ggplot2::ggplot(sotonUniqueEPCsDT, aes(x = CO2_EMISSIONS_CURRENT)) + geom_histogram(binwidth = 1)</code></pre> @@ -3441,10 +3569,10 @@ NA </table> <p>There are 22 properties with 0 or negative emissions. It looks like they are also the properties with -ve kWh as we might expect. So we can safely ignore them.</p> </div> -<div id="check-environment_impact_current" class="section level2"> -<h2><span class="header-section-number">3.3</span> Check ENVIRONMENT_IMPACT_CURRENT</h2> +<div id="epc-check-environment_impact_current" class="section level2"> +<h2><span class="header-section-number">3.3</span> EPC: Check ENVIRONMENT_IMPACT_CURRENT</h2> <p><code>Environmental impact</code> should decrease as emissions increase.</p> -<pre class="r"><code>ggplot2::ggplot(allEPCs_DT, aes(x = ENVIRONMENT_IMPACT_CURRENT)) + +<pre class="r"><code>ggplot2::ggplot(sotonEPCsDT, aes(x = ENVIRONMENT_IMPACT_CURRENT)) + geom_histogram()</code></pre> <pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre> <div class="figure"><span id="fig:checkImpact"></span> @@ -3454,7 +3582,7 @@ Figure 3.4: Histogram of ENVIRONMENT_IMPACT_CURRENT </p> </div> <p>So what is the relationship between ENVIRONMENT_IMPACT_CURRENT and CO2_EMISSIONS_CURRENT? It is not linear… (Figure <a href="#fig:checkEmissionsImpact">3.5</a>) and there are some interesting outliers.</p> -<pre class="r"><code>ggplot2::ggplot(allEPCs_DT, aes(x = CO2_EMISSIONS_CURRENT, +<pre class="r"><code>ggplot2::ggplot(sotonEPCsDT, aes(x = CO2_EMISSIONS_CURRENT, y = ENVIRONMENT_IMPACT_CURRENT, colour = TENURE)) + geom_point() + @@ -3467,8 +3595,8 @@ Figure 3.5: Plot of ENVIRONMENT_IMPACT_CURRENT vs CO2_EMISSIONS_CURRENT </p> </div> </div> -<div id="check-total_floor_area" class="section level2"> -<h2><span class="header-section-number">3.4</span> Check TOTAL_FLOOR_AREA</h2> +<div id="epc-check-total_floor_area" class="section level2"> +<h2><span class="header-section-number">3.4</span> EPC: Check TOTAL_FLOOR_AREA</h2> <p>Repeat the coding for total floor area using 5 m2 as the threshold of interest.</p> <pre class="r"><code>ggplot2::ggplot(sotonUniqueEPCsDT, aes(x = TOTAL_FLOOR_AREA)) + geom_histogram(binwidth = 1)</code></pre> @@ -3482,8 +3610,8 @@ Figure 3.6: Histogram of TOTAL_FLOOR_AREA sotonUniqueEPCsDT[, floorFlag := ifelse(TOTAL_FLOOR_AREA == 0, "0 m2", NA)] sotonUniqueEPCsDT[, floorFlag := ifelse(TOTAL_FLOOR_AREA > 0 & - TOTAL_FLOOR_AREA <= 10, "0-5 m2", floorFlag)] -sotonUniqueEPCsDT[, floorFlag := ifelse(TOTAL_FLOOR_AREA > 10, "5+ m2", floorFlag)] + TOTAL_FLOOR_AREA <= 5, "0-5 m2", floorFlag)] +sotonUniqueEPCsDT[, floorFlag := ifelse(TOTAL_FLOOR_AREA > 5, "5+ m2", floorFlag)] t <- with(sotonUniqueEPCsDT, table(floorFlag, consFlag)) @@ -3533,7 +3661,7 @@ kableExtra::kable(round(100*prop.table(t),2), caption = "% properties with 0 </td> <td style="text-align:right;"> -0.02 +0.00 </td> </tr> <tr> @@ -3547,7 +3675,7 @@ kableExtra::kable(round(100*prop.table(t),2), caption = "% properties with 0 </td> <td style="text-align:right;"> -99.86 +99.87 </td> </tr> </tbody> @@ -3578,7 +3706,7 @@ ENERGY_CONSUMPTION_CURRENT <tbody> <tr> <td style="text-align:right;"> -4.697565e-314 +9507976768 </td> <td style="text-align:left;"> House @@ -3592,7 +3720,7 @@ House </tr> <tr> <td style="text-align:right;"> -1.894551e-314 +3834614378 </td> <td style="text-align:left;"> House @@ -3606,7 +3734,7 @@ House </tr> <tr> <td style="text-align:right;"> -4.846111e-314 +9808638568 </td> <td style="text-align:left;"> House @@ -3620,7 +3748,7 @@ House </tr> <tr> <td style="text-align:right;"> -2.559778e-314 +5181048568 </td> <td style="text-align:left;"> House @@ -3634,7 +3762,7 @@ House </tr> <tr> <td style="text-align:right;"> -8.172097e-315 +1654050778 </td> <td style="text-align:left;"> House @@ -3648,7 +3776,7 @@ House </tr> <tr> <td style="text-align:right;"> -4.440838e-315 +898835568 </td> <td style="text-align:left;"> House @@ -3662,7 +3790,7 @@ House </tr> <tr> <td style="text-align:right;"> -4.076249e-314 +8250419178 </td> <td style="text-align:left;"> House @@ -3676,7 +3804,7 @@ House </tr> <tr> <td style="text-align:right;"> -1.933817e-314 +3914088378 </td> <td style="text-align:left;"> House @@ -3690,7 +3818,7 @@ House </tr> <tr> <td style="text-align:right;"> -1.280057e-314 +2590863278 </td> <td style="text-align:left;"> House @@ -3704,7 +3832,7 @@ House </tr> <tr> <td style="text-align:right;"> -2.444460e-314 +4947642078 </td> <td style="text-align:left;"> Flat @@ -3744,7 +3872,7 @@ ENERGY_CONSUMPTION_CURRENT <tbody> <tr> <td style="text-align:right;"> -9.111592e-316 +184420668 </td> <td style="text-align:left;"> Flat @@ -3758,7 +3886,7 @@ Flat </tr> <tr> <td style="text-align:right;"> -3.102124e-315 +627876968 </td> <td style="text-align:left;"> Flat @@ -3772,7 +3900,7 @@ Flat </tr> <tr> <td style="text-align:right;"> -3.294384e-315 +666790668 </td> <td style="text-align:left;"> Flat @@ -3786,7 +3914,7 @@ Flat </tr> <tr> <td style="text-align:right;"> -3.695003e-315 +747876968 </td> <td style="text-align:left;"> Flat @@ -3800,7 +3928,7 @@ Flat </tr> <tr> <td style="text-align:right;"> -4.369619e-315 +884420668 </td> <td style="text-align:left;"> Flat @@ -3814,7 +3942,7 @@ Flat </tr> <tr> <td style="text-align:right;"> -4.371685e-315 +884838868 </td> <td style="text-align:left;"> Flat @@ -3828,7 +3956,7 @@ Flat </tr> <tr> <td style="text-align:right;"> -7.302298e-315 +1478001668 </td> <td style="text-align:left;"> Flat @@ -3842,7 +3970,7 @@ Flat </tr> <tr> <td style="text-align:right;"> -9.515727e-315 +1926004568 </td> <td style="text-align:left;"> Flat @@ -3856,7 +3984,7 @@ Flat </tr> <tr> <td style="text-align:right;"> -9.562249e-315 +1935420668 </td> <td style="text-align:left;"> Flat @@ -3870,7 +3998,7 @@ Flat </tr> <tr> <td style="text-align:right;"> -1.059979e-314 +2145420668 </td> <td style="text-align:left;"> Flat @@ -3930,7 +4058,7 @@ Flat 0 </td> <td style="text-align:right;"> -0.02 +0.00 </td> </tr> <tr> @@ -3944,7 +4072,7 @@ Flat 0 </td> <td style="text-align:right;"> -99.86 +99.87 </td> </tr> </tbody> @@ -3952,459 +4080,663 @@ Flat <p>Table <a href="#tab:checkEmissions">3.2</a> shows that the properties with floor area of < 10m2 are not necessarily the ones with 0 or negative kWh values. Nevertheless they represent a small proportion of all properties.</p> <p>The scale of the x axis also suggests a few very large properties.</p> </div> -<div id="data-summary" class="section level2"> -<h2><span class="header-section-number">3.5</span> Data summary</h2> -<p>We have identified some issues with a small number of the properties in the EPC dataset. These are not unexpected given that much of the estimates rely on partial or presumed data. Data entry errors are also quite likely. As a result we exclude:</p> -<ul> -<li>any property where ENERGY_CONSUMPTION_CURRENT <= 0</li> -<li>any property where TOTAL_FLOOR_AREA <= 5</li> -<li>any property where CO2_EMISSIONS_CURRENT <= 0</li> -</ul> -<pre class="r"><code>finalEPCDT <- sotonUniqueEPCsDT[ENERGY_CONSUMPTION_CURRENT > 0 & - TOTAL_FLOOR_AREA > 5 & - CO2_EMISSIONS_CURRENT > 0] +<div id="epc-check-missing-epc-rates" class="section level2"> +<h2><span class="header-section-number">3.5</span> EPC: Check ‘missing’ EPC rates</h2> +<p>We know that we do not have EPC records for every dwelling. But how many are we missing? We will check this at MSOA level as it allows us to link to other MSOA level datasets that tell us how many households, dwellings or energy meters to expect. Arguably it would be better to do this at LSOA level but…</p> +<p>First we’ll use the BEIS 2018 MSOA level annual electricity data to estimate the number of meters (not properties) - some addresses can have 2 meters (e.g. standard & economy 7). However this is more useful than the number of gas meters since not all dwellings have mains gas but all (should?) have an electricity meter.</p> +<pre class="r"><code>sotonEnergyDT[, .(nElecMeters = sum(nElecMeters), + nGasMeters = sum(nGasMeters)), keyby = .(LAName)]</code></pre> +<pre><code>## LAName nElecMeters nGasMeters +## 1: Southampton 108333 81645</code></pre> +<p>Next we’ll check for the number of households reported by the 2011 Census.</p> +<blockquote> +<p>would be better to use dwellings but this gives us tenure as well</p> +</blockquote> +<pre class="r"><code>#censusDT <- data.table::fread(path.expand("~/data/")) -skimr::skim(finalEPCDT)</code></pre> -<pre><code>## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling back to -## `character`.</code></pre> -<table style="width: auto;" class="table table-condensed"> +t <- sotonCensus2011_DT[, .(sum_Deprivation = sum(nHHs_deprivation), + sum_Tenure = sum(nHHs_tenure)), keyby = .(LAName)] +kableExtra::kable(t, caption = "Census derived household counts")</code></pre> +<table> <caption> -<span id="tab:finalData">Table 3.4: </span>Data summary +<span id="tab:checkCensus">Table 3.4: </span>Census derived household counts </caption> <thead> <tr> <th style="text-align:left;"> +LAName </th> -<th style="text-align:left;"> +<th style="text-align:right;"> +sum_Deprivation +</th> +<th style="text-align:right;"> +sum_Tenure </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> -Name -</td> -<td style="text-align:left;"> -finalEPCDT -</td> -</tr> -<tr> -<td style="text-align:left;"> -Number of rows -</td> -<td style="text-align:left;"> -71502 -</td> -</tr> -<tr> -<td style="text-align:left;"> -Number of columns -</td> -<td style="text-align:left;"> -20 -</td> -</tr> -<tr> -<td style="text-align:left;"> -_______________________ -</td> -<td style="text-align:left;"> -</td> -</tr> -<tr> -<td style="text-align:left;"> -Column type frequency: -</td> -<td style="text-align:left;"> -</td> -</tr> -<tr> -<td style="text-align:left;"> -character -</td> -<td style="text-align:left;"> -12 -</td> -</tr> -<tr> -<td style="text-align:left;"> -Date -</td> -<td style="text-align:left;"> -2 -</td> -</tr> -<tr> -<td style="text-align:left;"> -numeric -</td> -<td style="text-align:left;"> -6 -</td> -</tr> -<tr> -<td style="text-align:left;"> -________________________ -</td> -<td style="text-align:left;"> +Southampton </td> -</tr> -<tr> -<td style="text-align:left;"> -Group variables +<td style="text-align:right;"> +98254 </td> -<td style="text-align:left;"> -None +<td style="text-align:right;"> +98254 </td> </tr> </tbody> </table> -<p><strong>Variable type: character</strong></p> -<table> +<p>That’s lower (as expected) but doesn’t allow for dwellings that were empty on census night.</p> +<pre class="r"><code># Postcodes don't help - no count of addresses in the data (there used to be??) +# but we can use it to check which Soton postcodes are missing from the EPC file + +sotonPostcodesReducedDT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(pcds, + split = " " + ) + ] +sotonPostcodesReducedDT[, .(nEPCs = .N), keyby = .(pc_chunk1)]</code></pre> +<pre><code>## pc_chunk1 nEPCs +## 1: SO1 2343 +## 2: SO14 1380 +## 3: SO15 1801 +## 4: SO16 1648 +## 5: SO17 602 +## 6: SO18 1208 +## 7: SO19 1398 +## 8: SO2 2737 +## 9: SO3 3 +## 10: SO4 13 +## 11: SO45 2 +## 12: SO9 1093</code></pre> +<p>We should not have single digit postcodes in the postcode data - i.e. S01 should not be there (since 1993). Southampton City is unusual in only having <a href="https://en.wikipedia.org/wiki/SO_postcode_area">double digit postcodes</a>.</p> +<pre class="r"><code># EPC +# set up counters +# use final cleaned EPC data +sotonUniqueEPCsDT[, epcIsSocialRent := ifelse(TENURE == "rental (social)", 1, 0)] +sotonUniqueEPCsDT[, epcIsPrivateRent := ifelse(TENURE == "rental (private)", 1, 0)] +sotonUniqueEPCsDT[, epcIsOwnerOcc := ifelse(TENURE == "owner-occupied", 1, 0)] +sotonUniqueEPCsDT[, epcIsUnknownTenure := ifelse(TENURE == "NO DATA!" | + TENURE == "" , 1, 0)] +# aggregate EPCs to postcodes +sotonEpcPostcodes_DT <- sotonUniqueEPCsDT[, .(nEPCs = .N, + sumEPC_tCO2 = sum(CO2_EMISSIONS_CURRENT, na.rm = TRUE), + n_epcIsSocialRent = sum(epcIsSocialRent, na.rm = TRUE), + n_epcIsPrivateRent = sum(epcIsPrivateRent, na.rm = TRUE), + n_epcIsOwnerOcc = sum(epcIsOwnerOcc, na.rm = TRUE), + n_epcIsUnknownTenure = sum(epcIsUnknownTenure, na.rm = TRUE), + sumEpcMWh = sum(ENERGY_CONSUMPTION_CURRENT* TOTAL_FLOOR_AREA)/1000), # crucial as ENERGY_CONSUMPTION_CURRENT = kWh/m2 + keyby = .(POSTCODE, LOCAL_AUTHORITY_LABEL)] + +sotonEpcPostcodes_DT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(POSTCODE, + split = " " + ) + ] +sotonEpcPostcodes_DT[, .(nEPCs = .N), keyby = .(pc_chunk1)]</code></pre> +<pre><code>## pc_chunk1 nEPCs +## 1: SO14 601 +## 2: SO15 960 +## 3: SO16 1245 +## 4: SO17 403 +## 5: SO18 776 +## 6: SO19 1122</code></pre> +<pre class="r"><code># check original EPC data for Soton - which postcodes are covered? +sotonEPCsDT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(POSTCODE, + split = " " + ) + ] +sotonEPCsDT[, .(nEPCs = .N), keyby = .(pc_chunk1)]</code></pre> +<pre><code>## pc_chunk1 nEPCs +## 1: SO14 14213 +## 2: SO15 17855 +## 3: SO16 20270 +## 4: SO17 8446 +## 5: SO18 10661 +## 6: SO19 20388</code></pre> +<p>It looks like we have EPCs for each postcode sector which is good.</p> +<pre class="r"><code># match the EPC postcode summaries to the postcode extract +sotonPostcodesReducedDT[, POSTCODE_s := stringr::str_remove(pcds, " ")] +setkey(sotonPostcodesReducedDT, POSTCODE_s) +sotonPostcodesReducedDT[, MSOACode := msoa11] +message("Number of postcodes: ",uniqueN(sotonPostcodesReducedDT$POSTCODE_s))</code></pre> +<pre><code>## Number of postcodes: 14228</code></pre> +<pre class="r"><code>sotonEpcPostcodes_DT[, POSTCODE_s := stringr::str_remove(POSTCODE, " ")] +setkey(sotonEpcPostcodes_DT, POSTCODE_s) +message("Number of postcodes with EPCs: ",uniqueN(sotonEpcPostcodes_DT$POSTCODE_s))</code></pre> +<pre><code>## Number of postcodes with EPCs: 5107</code></pre> +<pre class="r"><code>dt <- sotonEpcPostcodes_DT[sotonPostcodesReducedDT] + +# aggregate to MSOA - watch for NAs where no EPCs in a given postcode +sotonEpcMSOA_DT <- dt[, .(nEPCs = sum(nEPCs, na.rm = TRUE), + sumEPC_tCO2 = sum(sumEPC_tCO2, na.rm = TRUE), + n_epcIsSocialRent = sum(n_epcIsSocialRent, na.rm = TRUE), + n_epcIsPrivateRent = sum(n_epcIsPrivateRent, na.rm = TRUE), + n_epcIsOwnerOcc = sum(n_epcIsOwnerOcc, na.rm = TRUE), + n_epcIsUnknownTenure = sum(n_epcIsUnknownTenure, na.rm = TRUE), + sumEpcMWh = sum(sumEpcMWh, na.rm = TRUE) + ), + keyby = .(MSOACode) # change name on the fly for easier matching + ] + +#summary(sotonEpcMSOA_DT)</code></pre> +<p>So we have some postcodes with no EPCs.</p> +<p>Join the estimates together at MSOA level for comparison. There are 32 MSOAs in Southampton.</p> +<pre class="r"><code># 32 LSOAs in Soton +# add census & deprivation to energy +setkey(sotonEnergyDT, MSOACode) +setkey(sotonCensus2011_DT, MSOACode) +setkey(sotonEpcMSOA_DT, MSOACode) + +sotonMSOA_DT <- sotonCensus2011_DT[sotonEnergyDT] +#names(sotonMSOA_DT) +sotonMSOA_DT <- sotonEpcMSOA_DT[sotonMSOA_DT] +#names(sotonMSOA_DT) + +# add MSOA names from the postcode LUT + +msoaNamesDT <- data.table::as.data.table(readxl::read_xlsx(path.expand("~/data/UK_postcodes/NSPL_AUG_2020_UK/Documents/MSOA (2011) names and codes UK as at 12_12.xlsx"))) +msoaNamesDT[, MSOACode := MSOA11CD] +msoaNamesDT[, MSOAName := MSOA11NM] + +setkey(msoaNamesDT, MSOACode) +sotonMSOA_DT <- msoaNamesDT[sotonMSOA_DT] + +#names(sotonMSOA_DT)</code></pre> +<pre class="r"><code>t <- sotonMSOA_DT[, .(nHouseholds_2011 = sum(nHHs_tenure), + nElecMeters_2018 = sum(nElecMeters), + nEPCs_2020 = sum(nEPCs)), keyby = .(LAName)] + +kableExtra::kable(t, caption = "Comparison of different estimates of the number of dwellings") %>% + kable_styling()</code></pre> +<table class="table" style="margin-left: auto; margin-right: auto;"> +<caption> +<span id="tab:compareEpcEstimates">Table 3.5: </span>Comparison of different estimates of the number of dwellings +</caption> <thead> <tr> <th style="text-align:left;"> -skim_variable +LAName </th> <th style="text-align:right;"> -n_missing +nHouseholds_2011 </th> <th style="text-align:right;"> -complete_rate +nElecMeters_2018 </th> <th style="text-align:right;"> -min +nEPCs_2020 +</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:left;"> +Southampton +</td> +<td style="text-align:right;"> +98254 +</td> +<td style="text-align:right;"> +108333 +</td> +<td style="text-align:right;"> +71527 +</td> +</tr> +</tbody> +</table> +<pre class="r"><code>nHouseholds_2011f <- sum(sotonMSOA_DT$nHHs_tenure) +nElecMeters_2018f <- sum(sotonMSOA_DT$nElecMeters) +nEPCs_2020f <- sum(sotonMSOA_DT$nEPCs) + +makePC <- function(x,y,r){ + # make a percent of x/y and round it to r decimal places + pc <- round(100*(x/y),r) + return(pc) +}</code></pre> +<p>From this we calculate that number of EPCs we have is:</p> +<ul> +<li>72.8% of Census 2011 households +<ul> +<li>66% of the recorded 2018 electricity meters</li> +</ul></li> +</ul> +<p>We can also see that despite having ‘missing’ EPCs, the estimated total EPC-derived energy demand is marginally higher than the BEIS-derived weather corrected energy demand data. Given that the BEIS data accounts for all heating, cooking, hot water, lighting and appliance use we would expect the EPC data to be lower <em>even if no EPCs were missing…</em></p> +<pre class="r"><code>t <- sotonMSOA_DT[, .(MSOAName, MSOACode, nHHs_tenure,nElecMeters,nEPCs, + dep0_pc, socRent_pc, privRent_pc, ownerOcc_pc,sumEpcMWh, beisEnergyMWh )] + +t[, pc_missingHH := makePC(nEPCs,nHHs_tenure,1)] +t[, pc_missingMeters := makePC(nEPCs,nElecMeters,1)] +t[, pc_energyBEIS := makePC(sumEpcMWh,beisEnergyMWh,1)] + +kt1 <- t + +ggplot2::ggplot(t, aes(x = pc_missingHH, + y = pc_missingMeters, + colour = round(ownerOcc_pc))) + + geom_abline(alpha = 0.2, slope=1, intercept=0) + + geom_point() + + scale_color_continuous(name = "% owner occupiers \n(Census 2011)", high = "red", low = "green") + + #theme(legend.position = "bottom") + + labs(x = "EPCs 2020 as % of Census 2011 households", + y = "EPCs 2020 as % of electricity meters 2018", + caption = "x = y line included for clarity")</code></pre> +<div class="figure"><span id="fig:missingEPCbyMSOA"></span> +<img src="" alt="% 'missing' rates comparison" width="672" /> +<p class="caption"> +Figure 3.7: % ‘missing’ rates comparison +</p> +</div> +<pre class="r"><code>outlierMSOA <- t[pc_missingHH > 100]</code></pre> +<p>Figure <a href="#fig:missingEPCbyMSOA">3.7</a> (see Table <a href="#tab:bigMSOATable">7.1</a> below for details) suggests that rates vary considerably by MSOA but are relatively consistent across the two baseline ‘truth’ estimates with the exception of E02003577 which appears to have many more EPCs than Census 2011 households. It is worth noting that <a href="https://www.localhealth.org.uk/#c=report&chapter=c01&report=r01&selgeo1=msoa_2011.E02003577&selgeo2=eng.E92000001">this MSOA</a> covers the city centre and dock areas which have had substantial new build since 2011 and so may have households inhabiting dwellings that did not exist at Census 2011. This is also supported by the considerably higher EPC derived energy demand data compared to BEIS’s 2018 data - although it suggests the dwellings are either very new (since 2018) or are yet to be occupied.</p> +<p>As we would expect those MSOAs with the lowest EPC coverage on both baseline measures tend to have higher proportions of owner occupiers.</p> +<p>We can use the same approach to compare estimates of total energy demand at the MSOA level. To do this we compare:</p> +<ul> +<li>estimated total energy demand in MWh/year derived from the EPC estimates. This energy only relates to <code>current primary energy</code> (space heating, hot water and lighting) and of course also suffers from missing EPCs (see above)</li> +<li>observed electricity and gas demand collated by BEIS for their sub-national statistical series. This applies to all domestic energy demand but the most recent data is for 2018 so will suffer from the absence of dwellings that are present in the most recent EPC data (see above).</li> +</ul> +<p>We should therefore not expect the values to match but we might reasonably expect a correlation.</p> +<pre class="r"><code>ggplot2::ggplot(t, aes(x = sumEpcMWh, + y = beisEnergyMWh, + colour = round(ownerOcc_pc))) + + geom_abline(alpha = 0.2, slope=1, intercept=0) + + geom_point() + + scale_color_continuous(name = "% owner occupiers \n(Census 2011)", high = "red", low = "green") + + #theme(legend.position = "bottom") + + labs(x = "EPC 2020 derived total MWh/year", + y = "BEIS 2018 derived total MWh/year", + caption = "x = y line included for clarity")</code></pre> +<div class="figure"><span id="fig:energyMSOAPlot"></span> +<img src="" alt="Energy demand comparison" width="672" /> +<p class="caption"> +Figure 3.8: Energy demand comparison +</p> +</div> +<pre class="r"><code>outlier <- t[sumEpcMWh > 70000]</code></pre> +<p>Figure <a href="#fig:energyMSOAPlot">3.8</a> shows that both of these are true. MSOAs with a high proportion of owner occupiers (and therefore more likely to have missing EPCs) tend to have higher observed energy demand than the EOC data suggests - they are above the reference line. MSOAs with a lower proportion of owner occupiers (and therefore more likely to have more complete EPC coverage) tend to be on or below the line. As before we have the same notable outlier (E02003577) and for the same reasons… In this case this produces a much higher energy demand estimate than the BEIS 2018 data records.</p> +</div> +<div id="beis-check-data" class="section level2"> +<h2><span class="header-section-number">3.6</span> BEIS: Check data</h2> +<p>While we’re here we’ll also check the BEIS data. Table <a href="#tab:beisDesc">3.6</a> shows the five highest and lowest MSOAs by annual electricity use.</p> +<pre class="r"><code>t1 <- head(sotonMSOA_DT[, .(MSOA11NM, MSOA11CD, beisElecMWh, nElecMeters, + beisGasMWh, nGasMeters)][order(-beisElecMWh)],5) + +kableExtra::kable(t1, caption = "Southampton MSOAs: BEIS 2018 energy data ordered by highest electricity (top 5)") %>% + kable_styling()</code></pre> +<table class="table" style="margin-left: auto; margin-right: auto;"> +<caption> +<span id="tab:beisDesc">Table 3.6: </span>Southampton MSOAs: BEIS 2018 energy data ordered by highest electricity (top 5) +</caption> +<thead> +<tr> +<th style="text-align:left;"> +MSOA11NM +</th> +<th style="text-align:left;"> +MSOA11CD </th> <th style="text-align:right;"> -max +beisElecMWh </th> <th style="text-align:right;"> -empty +nElecMeters </th> <th style="text-align:right;"> -n_unique +beisGasMWh </th> <th style="text-align:right;"> -whitespace +nGasMeters </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> -BUILDING_REFERENCE_NUMBER -</td> -<td style="text-align:right;"> -0 -</td> -<td style="text-align:right;"> -1.00 +Southampton 029 </td> -<td style="text-align:right;"> -17 +<td style="text-align:left;"> +E02003577 </td> <td style="text-align:right;"> -21 +27352.70 </td> <td style="text-align:right;"> -0 +6734 </td> <td style="text-align:right;"> -71502 +20108.63 </td> <td style="text-align:right;"> -0 +2420 </td> </tr> <tr> <td style="text-align:left;"> -LMK_KEY -</td> -<td style="text-align:right;"> -0 -</td> -<td style="text-align:right;"> -1.00 +Southampton 014 </td> -<td style="text-align:right;"> -29 +<td style="text-align:left;"> +E02003562 </td> <td style="text-align:right;"> -34 +14757.18 </td> <td style="text-align:right;"> -0 +3921 </td> <td style="text-align:right;"> -71502 +36532.48 </td> <td style="text-align:right;"> -0 +2983 </td> </tr> <tr> <td style="text-align:left;"> -PROPERTY_TYPE -</td> -<td style="text-align:right;"> -0 -</td> -<td style="text-align:right;"> -1.00 +Southampton 022 </td> -<td style="text-align:right;"> -4 +<td style="text-align:left;"> +E02003570 </td> <td style="text-align:right;"> -10 +14719.37 </td> <td style="text-align:right;"> -0 +4142 </td> <td style="text-align:right;"> -5 +34730.60 </td> <td style="text-align:right;"> -0 +3083 </td> </tr> <tr> <td style="text-align:left;"> -BUILT_FORM -</td> -<td style="text-align:right;"> -0 -</td> -<td style="text-align:right;"> -1.00 +Southampton 031 </td> -<td style="text-align:right;"> -8 +<td style="text-align:left;"> +E02003579 </td> <td style="text-align:right;"> -20 +13860.94 </td> <td style="text-align:right;"> -0 +4460 </td> <td style="text-align:right;"> -7 +34052.12 </td> <td style="text-align:right;"> -0 +3068 </td> </tr> <tr> <td style="text-align:left;"> -TENURE -</td> -<td style="text-align:right;"> -0 -</td> -<td style="text-align:right;"> -1.00 +Southampton 021 </td> -<td style="text-align:right;"> -0 +<td style="text-align:left;"> +E02003569 </td> <td style="text-align:right;"> -16 +13719.22 </td> <td style="text-align:right;"> -1905 +3999 </td> <td style="text-align:right;"> -6 +27661.45 </td> <td style="text-align:right;"> -0 +2722 </td> </tr> +</tbody> +</table> +<pre class="r"><code>t2 <- tail(sotonMSOA_DT[, .(MSOA11NM, MSOA11CD, beisElecMWh, nElecMeters, + beisGasMWh, nGasMeters)][order(-beisElecMWh)],5) + +kableExtra::kable(t2, caption = "Southampton MSOAs: BEIS 2018 energy data ordered by lowest electricity (bottom 5)") %>% + kable_styling()</code></pre> +<table class="table" style="margin-left: auto; margin-right: auto;"> +<caption> +<span id="tab:beisDesc">Table 3.6: </span>Southampton MSOAs: BEIS 2018 energy data ordered by lowest electricity (bottom 5) +</caption> +<thead> +<tr> +<th style="text-align:left;"> +MSOA11NM +</th> +<th style="text-align:left;"> +MSOA11CD +</th> +<th style="text-align:right;"> +beisElecMWh +</th> +<th style="text-align:right;"> +nElecMeters +</th> +<th style="text-align:right;"> +beisGasMWh +</th> +<th style="text-align:right;"> +nGasMeters +</th> +</tr> +</thead> +<tbody> <tr> <td style="text-align:left;"> -POSTCODE -</td> -<td style="text-align:right;"> -0 +Southampton 024 </td> -<td style="text-align:right;"> -1.00 +<td style="text-align:left;"> +E02003572 </td> <td style="text-align:right;"> -8 +9347.893 </td> <td style="text-align:right;"> -8 +2597 </td> <td style="text-align:right;"> -0 +30332.49 </td> <td style="text-align:right;"> -5105 -</td> -<td style="text-align:right;"> -0 +2381 </td> </tr> <tr> <td style="text-align:left;"> -LOCAL_AUTHORITY_LABEL -</td> -<td style="text-align:right;"> -0 -</td> -<td style="text-align:right;"> -1.00 +Southampton 018 </td> -<td style="text-align:right;"> -11 +<td style="text-align:left;"> +E02003566 </td> <td style="text-align:right;"> -11 +9221.544 </td> <td style="text-align:right;"> -0 +2831 </td> <td style="text-align:right;"> -1 +26826.22 </td> <td style="text-align:right;"> -0 +2607 </td> </tr> <tr> <td style="text-align:left;"> -hasWind -</td> -<td style="text-align:right;"> -5546 -</td> -<td style="text-align:right;"> -0.92 +Southampton 008 </td> -<td style="text-align:right;"> -2 +<td style="text-align:left;"> +E02003556 </td> <td style="text-align:right;"> -3 +9199.673 </td> <td style="text-align:right;"> -0 +2589 </td> <td style="text-align:right;"> -2 +26412.36 </td> <td style="text-align:right;"> -0 +2295 </td> </tr> <tr> <td style="text-align:left;"> -hasPV -</td> -<td style="text-align:right;"> -38495 -</td> -<td style="text-align:right;"> -0.46 +Southampton 003 </td> -<td style="text-align:right;"> -2 +<td style="text-align:left;"> +E02003551 </td> <td style="text-align:right;"> -3 +8957.742 </td> <td style="text-align:right;"> -0 +2446 </td> <td style="text-align:right;"> -2 +17358.87 </td> <td style="text-align:right;"> -0 +1649 </td> </tr> <tr> <td style="text-align:left;"> -consFlag +Southampton 005 </td> -<td style="text-align:right;"> -0 +<td style="text-align:left;"> +E02003553 </td> <td style="text-align:right;"> -1.00 +8479.993 </td> <td style="text-align:right;"> -8 +2464 </td> <td style="text-align:right;"> -8 +24996.91 </td> <td style="text-align:right;"> -0 +2303 </td> -<td style="text-align:right;"> -1 +</tr> +</tbody> +</table> +</div> +</div> +<div id="summarise-and-save-epc-data-for-re-use" class="section level1"> +<h1><span class="header-section-number">4</span> Summarise and save EPC data for re-use</h1> +<p>We have identified some issues with a small number of the properties in the EPC dataset. These are not unexpected given that much of the estimates rely on partial or presumed data. Data entry errors are also quite likely. As a result we exclude:</p> +<ul> +<li>any property where ENERGY_CONSUMPTION_CURRENT <= 0</li> +<li>any property where TOTAL_FLOOR_AREA <= 5</li> +<li>any property where CO2_EMISSIONS_CURRENT <= 0</li> +</ul> +<pre class="r"><code>finalEPCDT <- sotonUniqueEPCsDT[ENERGY_CONSUMPTION_CURRENT > 0 & + TOTAL_FLOOR_AREA > 5 & + CO2_EMISSIONS_CURRENT > 0] + +skimr::skim(finalEPCDT)</code></pre> +<pre><code>## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling back to +## `character`.</code></pre> +<table style="width: auto;" class="table table-condensed"> +<caption> +<span id="tab:finalData">Table 4.1: </span>Data summary +</caption> +<thead> +<tr> +<th style="text-align:left;"> +</th> +<th style="text-align:left;"> +</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:left;"> +Name </td> -<td style="text-align:right;"> -0 +<td style="text-align:left;"> +finalEPCDT </td> </tr> <tr> <td style="text-align:left;"> -emissionsFlag +Number of rows </td> -<td style="text-align:right;"> -0 +<td style="text-align:left;"> +71502 </td> -<td style="text-align:right;"> -1.00 +</tr> +<tr> +<td style="text-align:left;"> +Number of columns </td> -<td style="text-align:right;"> -9 +<td style="text-align:left;"> +24 </td> -<td style="text-align:right;"> -10 +</tr> +<tr> +<td style="text-align:left;"> +_______________________ </td> -<td style="text-align:right;"> -0 +<td style="text-align:left;"> </td> -<td style="text-align:right;"> -2 +</tr> +<tr> +<td style="text-align:left;"> +Column type frequency: </td> -<td style="text-align:right;"> -0 +<td style="text-align:left;"> </td> </tr> <tr> <td style="text-align:left;"> -floorFlag +character </td> -<td style="text-align:right;"> -0 +<td style="text-align:left;"> +12 </td> -<td style="text-align:right;"> -1.00 +</tr> +<tr> +<td style="text-align:left;"> +Date </td> -<td style="text-align:right;"> -5 +<td style="text-align:left;"> +2 </td> -<td style="text-align:right;"> -6 +</tr> +<tr> +<td style="text-align:left;"> +numeric </td> -<td style="text-align:right;"> -0 +<td style="text-align:left;"> +10 </td> -<td style="text-align:right;"> -2 +</tr> +<tr> +<td style="text-align:left;"> +________________________ </td> -<td style="text-align:right;"> -0 +<td style="text-align:left;"> +</td> +</tr> +<tr> +<td style="text-align:left;"> +Group variables +</td> +<td style="text-align:left;"> +None </td> </tr> </tbody> </table> -<p><strong>Variable type: Date</strong></p> +<p><strong>Variable type: character</strong></p> <table> <thead> <tr> @@ -4417,112 +4749,79 @@ n_missing <th style="text-align:right;"> complete_rate </th> -<th style="text-align:left;"> +<th style="text-align:right;"> min </th> -<th style="text-align:left;"> +<th style="text-align:right;"> max </th> -<th style="text-align:left;"> -median +<th style="text-align:right;"> +empty </th> <th style="text-align:right;"> n_unique </th> +<th style="text-align:right;"> +whitespace +</th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> -LODGEMENT_DATE +BUILDING_REFERENCE_NUMBER </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> -1 +1.00 </td> -<td style="text-align:left;"> -2008-10-01 +<td style="text-align:right;"> +17 </td> -<td style="text-align:left;"> -2020-06-30 +<td style="text-align:right;"> +21 </td> -<td style="text-align:left;"> -2014-10-22 +<td style="text-align:right;"> +0 </td> <td style="text-align:right;"> -4132 +71502 +</td> +<td style="text-align:right;"> +0 </td> </tr> <tr> <td style="text-align:left;"> -INSPECTION_DATE +LMK_KEY </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> -1 +1.00 </td> -<td style="text-align:left;"> -2007-03-02 +<td style="text-align:right;"> +29 </td> -<td style="text-align:left;"> -2020-06-30 +<td style="text-align:right;"> +34 </td> -<td style="text-align:left;"> -2014-10-14 +<td style="text-align:right;"> +0 </td> <td style="text-align:right;"> -3906 +71502 +</td> +<td style="text-align:right;"> +0 </td> </tr> -</tbody> -</table> -<p><strong>Variable type: numeric</strong></p> -<table> -<thead> -<tr> -<th style="text-align:left;"> -skim_variable -</th> -<th style="text-align:right;"> -n_missing -</th> -<th style="text-align:right;"> -complete_rate -</th> -<th style="text-align:right;"> -mean -</th> -<th style="text-align:right;"> -sd -</th> -<th style="text-align:right;"> -p0 -</th> -<th style="text-align:right;"> -p25 -</th> -<th style="text-align:right;"> -p50 -</th> -<th style="text-align:right;"> -p75 -</th> -<th style="text-align:right;"> -p100 -</th> -<th style="text-align:left;"> -hist -</th> -</tr> -</thead> -<tbody> <tr> <td style="text-align:left;"> -ENVIRONMENT_IMPACT_CURRENT +PROPERTY_TYPE </td> <td style="text-align:right;"> 0 @@ -4531,33 +4830,50 @@ ENVIRONMENT_IMPACT_CURRENT 1.00 </td> <td style="text-align:right;"> -62.51 +4 </td> <td style="text-align:right;"> -15.72 +10 +</td> +<td style="text-align:right;"> +0 +</td> +<td style="text-align:right;"> +5 +</td> +<td style="text-align:right;"> +0 +</td> +</tr> +<tr> +<td style="text-align:left;"> +BUILT_FORM +</td> +<td style="text-align:right;"> +0 </td> <td style="text-align:right;"> 1.00 </td> <td style="text-align:right;"> -52.0 +8 </td> <td style="text-align:right;"> -63.00 +20 </td> <td style="text-align:right;"> -73 +0 </td> <td style="text-align:right;"> -100.00 +7 </td> -<td style="text-align:left;"> -â–▂▆▇▂ +<td style="text-align:right;"> +0 </td> </tr> <tr> <td style="text-align:left;"> -ENERGY_CONSUMPTION_CURRENT +TENURE </td> <td style="text-align:right;"> 0 @@ -4566,33 +4882,50 @@ ENERGY_CONSUMPTION_CURRENT 1.00 </td> <td style="text-align:right;"> -263.23 +0 </td> <td style="text-align:right;"> -140.47 +16 </td> <td style="text-align:right;"> -4.00 +1905 </td> <td style="text-align:right;"> -174.0 +6 </td> <td style="text-align:right;"> -233.00 +0 +</td> +</tr> +<tr> +<td style="text-align:left;"> +POSTCODE </td> <td style="text-align:right;"> -327 +0 </td> <td style="text-align:right;"> -1597.00 +1.00 </td> -<td style="text-align:left;"> -▇▂â–â–â– +<td style="text-align:right;"> +8 +</td> +<td style="text-align:right;"> +8 +</td> +<td style="text-align:right;"> +0 +</td> +<td style="text-align:right;"> +5105 +</td> +<td style="text-align:right;"> +0 </td> </tr> <tr> <td style="text-align:left;"> -CO2_EMISSIONS_CURRENT +LOCAL_AUTHORITY_LABEL </td> <td style="text-align:right;"> 0 @@ -4601,33 +4934,50 @@ CO2_EMISSIONS_CURRENT 1.00 </td> <td style="text-align:right;"> -3.17 +11 </td> <td style="text-align:right;"> -1.94 +11 </td> <td style="text-align:right;"> -0.10 +0 </td> <td style="text-align:right;"> -1.8 +1 </td> <td style="text-align:right;"> -2.85 +0 +</td> +</tr> +<tr> +<td style="text-align:left;"> +hasWind </td> <td style="text-align:right;"> -4 +5546 </td> <td style="text-align:right;"> -77.00 +0.92 </td> -<td style="text-align:left;"> -â–‡â–â–â–â– +<td style="text-align:right;"> +2 +</td> +<td style="text-align:right;"> +3 +</td> +<td style="text-align:right;"> +0 +</td> +<td style="text-align:right;"> +2 +</td> +<td style="text-align:right;"> +0 </td> </tr> <tr> <td style="text-align:left;"> -PHOTO_SUPPLY +hasPV </td> <td style="text-align:right;"> 38495 @@ -4636,68 +4986,76 @@ PHOTO_SUPPLY 0.46 </td> <td style="text-align:right;"> -0.59 +2 </td> <td style="text-align:right;"> -5.11 +3 </td> <td style="text-align:right;"> -0.00 +0 </td> <td style="text-align:right;"> -0.0 +2 </td> <td style="text-align:right;"> -0.00 +0 +</td> +</tr> +<tr> +<td style="text-align:left;"> +consFlag </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> -100.00 +1.00 </td> -<td style="text-align:left;"> -â–‡â–â–â–â– +<td style="text-align:right;"> +8 </td> -</tr> -<tr> -<td style="text-align:left;"> -WIND_TURBINE_COUNT +<td style="text-align:right;"> +8 </td> <td style="text-align:right;"> -5546 +0 </td> <td style="text-align:right;"> -0.92 +1 </td> <td style="text-align:right;"> -0.00 +0 +</td> +</tr> +<tr> +<td style="text-align:left;"> +emissionsFlag </td> <td style="text-align:right;"> -0.02 +0 </td> <td style="text-align:right;"> --1.00 +1.00 </td> <td style="text-align:right;"> -0.0 +9 </td> <td style="text-align:right;"> -0.00 +10 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> -1.00 +2 </td> -<td style="text-align:left;"> -â–â–â–‡â–â– +<td style="text-align:right;"> +0 </td> </tr> <tr> <td style="text-align:left;"> -TOTAL_FLOOR_AREA +floorFlag </td> <td style="text-align:right;"> 0 @@ -4706,646 +5064,543 @@ TOTAL_FLOOR_AREA 1.00 </td> <td style="text-align:right;"> -73.05 +5 </td> <td style="text-align:right;"> -34.86 +5 </td> <td style="text-align:right;"> -5.85 +0 </td> <td style="text-align:right;"> -49.0 +1 </td> <td style="text-align:right;"> -69.00 +0 +</td> +</tr> +</tbody> +</table> +<p><strong>Variable type: Date</strong></p> +<table> +<thead> +<tr> +<th style="text-align:left;"> +skim_variable +</th> +<th style="text-align:right;"> +n_missing +</th> +<th style="text-align:right;"> +complete_rate +</th> +<th style="text-align:left;"> +min +</th> +<th style="text-align:left;"> +max +</th> +<th style="text-align:left;"> +median +</th> +<th style="text-align:right;"> +n_unique +</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:left;"> +LODGEMENT_DATE </td> <td style="text-align:right;"> -87 +0 </td> <td style="text-align:right;"> -1353.68 +1 </td> <td style="text-align:left;"> -â–‡â–â–â–â– +2008-10-01 </td> -</tr> -</tbody> -</table> -<p>This leaves us with a total of 71,502 properties.</p> -<pre class="r"><code>of <- path.expand("~/data/EW_epc/domestic-E06000045-Southampton/finalClean.csv") -data.table::fwrite(finalEPCDT, file = of) - -message("Gziping ", of)</code></pre> -<pre><code>## Gziping /Users/ben/data/EW_epc/domestic-E06000045-Southampton/finalClean.csv</code></pre> -<pre class="r"><code># Gzip it -# in case it fails (it will on windows - you will be left with a .csv file) -try(system( paste0("gzip -f '", of,"'"))) # include ' or it breaks on spaces -message("Gzipped ", of)</code></pre> -<pre><code>## Gzipped /Users/ben/data/EW_epc/domestic-E06000045-Southampton/finalClean.csv</code></pre> -</div> -</div> -<div id="check-missing-epc-rates" class="section level1"> -<h1><span class="header-section-number">4</span> Check ‘missing’ EPC rates</h1> -<p>We know that we do not have EPC records for every dwelling. But how many are we missing? We will check this at MSOA level as it allows us to link to other MSOA level datasets that tell us how many households, dwellings or energy meters to expect. Arguably it would be better to do this at LSOA level but…</p> -<p>First we’ll use the BEIS 2018 MSOA level annual electricity data to estimate the number of meters (not properties) - some addresses can have 2 meters (e.g. standard & economy 7). This is more useful than the number of gas meters since not all dwellings have mains gas but all have an electricity meter.</p> -<pre class="r"><code>beisElecDT <- data.table::fread("~/data/beis/MSOA_DOM_ELEC_csv/MSOA_ELEC_2018.csv") -sotonElecDT <- beisElecDT[LAName %like% "Southampton", .(nElecMeters = METERS, - beisElecMWh = KWH/1000, - MSOACode, LAName) - ] - - -beisGasDT <- data.table::fread("~/data/beis/MSOA_DOM_GAS_csv/MSOA_GAS_2018.csv") -sotonGasDT <- beisGasDT[LAName %like% "Southampton", .(nGasMeters = METERS, - beisGasMWh = KWH/1000, - MSOACode)] - -setkey(sotonElecDT, MSOACode) -setkey(sotonGasDT, MSOACode) -sotonEnergyDT <- sotonGasDT[sotonElecDT] -sotonEnergyDT[, beisEnergyMWh := beisElecMWh + beisGasMWh] -#head(sotonEnergyDT)</code></pre> -<p>Next we’ll check for the number of households reported by the 2011 Census.</p> -<blockquote> -<p>would be better to use dwellings but this gives us tenure</p> -</blockquote> -<pre class="r"><code>#censusDT <- data.table::fread(path.expand("~/data/")) -# IMD ---- -deprivationDT <- data.table::fread(path.expand("~/data/census2011/2011_MSOA_deprivation.csv")) -deprivationDT[, totalHouseholds := `Household Deprivation: All categories: Classification of household deprivation; measures: Value`] -deprivationDT[, MSOACode := `geography code`] -setkey(deprivationDT, MSOACode) -setkey(sotonElecDT, MSOACode) -# link LA name from Soton elec for now -sotonDep_DT <- deprivationDT[sotonElecDT[, .(MSOACode, LAName)]] -sotonDep_DT[, nHHs_deprivation := `Household Deprivation: All categories: Classification of household deprivation; measures: Value`] - -#sotonDep_DT[, .(nHouseholds = sum(totalHouseholds)), keyby = .(LAName)] - -# census tenure ---- -sotonTenureDT <- data.table::fread(path.expand("~/data/census2011/2011_MSOA_householdTenure_Soton.csv")) - -sotonTenureDT[, census2011_socialRent := `Tenure: Social rented; measures: Value`] -sotonTenureDT[, census2011_privateRent := `Tenure: Private rented; measures: Value`] -sotonTenureDT[, census2011_ownerOccupy := `Tenure: Owned; measures: Value`] -sotonTenureDT[, census2011_other := `Tenure: Living rent free; measures: Value`] -sotonTenureDT[, MSOACode := `geography code`] - -sotonTenureDT[, hhCheck := census2011_socialRent + census2011_privateRent + census2011_ownerOccupy + census2011_other] -sotonTenureDT[, nHHs_tenure := `Tenure: All households; measures: Value`] - -# summary(sotonTenureDT[, .(hhCheck, nHHs_tenure)]) -# might not quite match due to cell perturbation etc? - -# join em ---- -setkey(sotonDep_DT, MSOACode) -setkey(sotonTenureDT, MSOACode) - -sotonCensus2011_DT <- sotonTenureDT[sotonDep_DT] - -t <- sotonCensus2011_DT[, .(sum_Deprivation = sum(nHHs_deprivation), - sum_Tenure = sum(nHHs_tenure)), keyby = .(LAName)] -kableExtra::kable(t, caption = "Census derived household counts")</code></pre> -<table> -<caption> -<span id="tab:checkCensus">Table 4.1: </span>Census derived household counts -</caption> -<thead> -<tr> -<th style="text-align:left;"> -LAName -</th> -<th style="text-align:right;"> -sum_Deprivation -</th> -<th style="text-align:right;"> -sum_Tenure -</th> -</tr> -</thead> -<tbody> -<tr> <td style="text-align:left;"> -Southampton +2020-06-30 </td> -<td style="text-align:right;"> -98254 +<td style="text-align:left;"> +2014-10-22 </td> <td style="text-align:right;"> -98254 +4132 </td> </tr> -</tbody> -</table> -<p>That’s lower (as expected) but doesn’t allow for dwellings that were empty on census night.</p> -<pre class="r"><code># Postcodes don't help - no count of addresses in the data (there used to be??) -# but we can use it to check which Soton postcodes are missing from the EPC file -soPostcodesDT <- data.table::fread(path.expand("~/data/UK_postcodes/NSPL_AUG_2020_UK/Data/multi_csv/NSPL_AUG_2020_UK_SO.csv")) - -soPostcodesDT <- soPostcodesDT[is.na(doterm)] # keep current - -sotonPostcodesDT <- soPostcodesDT[laua == "E06000045"] # keep Southampton City - -sotonPostcodesReducedDT <- sotonPostcodesDT[, .(pcd, pcd2, pcds, laua, msoa11, lsoa11)] - -sotonPostcodesReducedDT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(pcds, - split = " " - ) - ] -sotonPostcodesReducedDT[, .(nEPCs = .N), keyby = .(pc_chunk1)]</code></pre> -<pre><code>## pc_chunk1 nEPCs -## 1: SO14 849 -## 2: SO15 1176 -## 3: SO16 1328 -## 4: SO17 443 -## 5: SO18 859 -## 6: SO19 1164</code></pre> -<p>We should not have single digit postcodes in the postcode data - i.e. S01 should not be there (since 1993). Southampton City is unusual in only having <a href="https://en.wikipedia.org/wiki/SO_postcode_area">double digit postcodes</a>.</p> -<pre class="r"><code># EPC -# set up counters -# use final cleaned EPC data -finalEPCDT[, epcIsSocialRent := ifelse(TENURE == "rental (social)", 1, 0)] -finalEPCDT[, epcIsPrivateRent := ifelse(TENURE == "rental (private)", 1, 0)] -finalEPCDT[, epcIsOwnerOcc := ifelse(TENURE == "owner-occupied", 1, 0)] -finalEPCDT[, epcIsUnknownTenure := ifelse(TENURE == "NO DATA!" | - TENURE == "" , 1, 0)] -# aggregate EPCs to postcodes -sotonEpcPostcodes_DT <- finalEPCDT[, .(nEPCs = .N, - sumEPC_tCO2 = sum(CO2_EMISSIONS_CURRENT, na.rm = TRUE), - n_epcIsSocialRent = sum(epcIsSocialRent, na.rm = TRUE), - n_epcIsPrivateRent = sum(epcIsPrivateRent, na.rm = TRUE), - n_epcIsOwnerOcc = sum(epcIsOwnerOcc, na.rm = TRUE), - n_epcIsUnknownTenure = sum(epcIsUnknownTenure, na.rm = TRUE), - sumEpcMWh = sum(ENERGY_CONSUMPTION_CURRENT* TOTAL_FLOOR_AREA)/1000), # crucial as ENERGY_CONSUMPTION_CURRENT = kWh/m2 - keyby = .(POSTCODE, LOCAL_AUTHORITY_LABEL)] - -sotonEpcPostcodes_DT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(POSTCODE, - split = " " - ) - ] -sotonEpcPostcodes_DT[, .(nEPCs = .N), keyby = .(pc_chunk1)]</code></pre> -<pre><code>## pc_chunk1 nEPCs -## 1: SO14 601 -## 2: SO15 960 -## 3: SO16 1244 -## 4: SO17 403 -## 5: SO18 775 -## 6: SO19 1122</code></pre> -<pre class="r"><code># check original EPC data for Soton - which postcodes are covered? -allEPCs_DT[, c("pc_chunk1","pc_chunk2" ) := tstrsplit(POSTCODE, - split = " " - ) - ] -allEPCs_DT[, .(nEPCs = .N), keyby = .(pc_chunk1)]</code></pre> -<pre><code>## pc_chunk1 nEPCs -## 1: SO14 14213 -## 2: SO15 17855 -## 3: SO16 20270 -## 4: SO17 8446 -## 5: SO18 10661 -## 6: SO19 20388</code></pre> -<p>It looks like we have EPCs for each postcode sector which is good.</p> -<pre class="r"><code># match the EPC postcode summaries to the postcode extract -sotonPostcodesReducedDT[, POSTCODE_s := stringr::str_remove(pcds, " ")] -setkey(sotonPostcodesReducedDT, POSTCODE_s) -sotonPostcodesReducedDT[, MSOACode := msoa11] -message("Number of postcodes: ",uniqueN(sotonPostcodesReducedDT$POSTCODE_s))</code></pre> -<pre><code>## Number of postcodes: 5819</code></pre> -<pre class="r"><code>sotonEpcPostcodes_DT[, POSTCODE_s := stringr::str_remove(POSTCODE, " ")] -setkey(sotonEpcPostcodes_DT, POSTCODE_s) -message("Number of postcodes with EPCs: ",uniqueN(sotonEpcPostcodes_DT$POSTCODE_s))</code></pre> -<pre><code>## Number of postcodes with EPCs: 5105</code></pre> -<pre class="r"><code>dt <- sotonEpcPostcodes_DT[sotonPostcodesReducedDT] - -# aggregate to MSOA - watch for NAs where no EPCs in a given postcode -sotonEpcMSOA_DT <- dt[, .(nEPCs = sum(nEPCs, na.rm = TRUE), - sumEPC_tCO2 = sum(sumEPC_tCO2, na.rm = TRUE), - n_epcIsSocialRent = sum(n_epcIsSocialRent, na.rm = TRUE), - n_epcIsPrivateRent = sum(n_epcIsPrivateRent, na.rm = TRUE), - n_epcIsOwnerOcc = sum(n_epcIsOwnerOcc, na.rm = TRUE), - n_epcIsUnknownTenure = sum(n_epcIsUnknownTenure, na.rm = TRUE), - sumEpcMWh = sum(sumEpcMWh, na.rm = TRUE) - ), - keyby = .(MSOACode) # change name on the fly for easier matching - ] - -#summary(sotonEpcMSOA_DT)</code></pre> -<p>So we have some postcodes with no EPCs.</p> -<p>Join the estimates together at MSOA level for comparison. There are 32 MSOAs in Southampton.</p> -<pre class="r"><code># 32 LSOAs in Soton -# add deprivation -setkey(sotonEnergyDT, MSOACode) -setkey(sotonCensus2011_DT, MSOACode) -setkey(sotonEpcMSOA_DT, MSOACode) - -sotonMSOA_DT <- sotonCensus2011_DT[sotonEnergyDT] -#names(sotonMSOA_DT) -sotonMSOA_DT <- sotonEpcMSOA_DT[sotonMSOA_DT] -#names(sotonMSOA_DT) - -# add MSOA names from the postcode LUT - -msoaNamesDT <- data.table::as.data.table(readxl::read_xlsx(path.expand("~/data/UK_postcodes/NSPL_AUG_2020_UK/Documents/MSOA (2011) names and codes UK as at 12_12.xlsx"))) -msoaNamesDT[, MSOACode := MSOA11CD] -msoaNamesDT[, MSOAName := MSOA11NM] -setkey(msoaNamesDT, MSOACode) - -sotonMSOA_DT <- msoaNamesDT[sotonMSOA_DT] - -#names(sotonMSOA_DT)</code></pre> -<pre class="r"><code>t <- sotonMSOA_DT[, .(nHouseholds_2011 = sum(nHHs_tenure), - nElecMeters_2018 = sum(nElecMeters), - nEPCs_2020 = sum(nEPCs)), keyby = .(LAName)] - -kableExtra::kable(t, caption = "Comparison of different estimates of the number of dwellings") %>% - kable_styling()</code></pre> -<table class="table" style="margin-left: auto; margin-right: auto;"> -<caption> -<span id="tab:compareEpcEstimates">Table 4.2: </span>Comparison of different estimates of the number of dwellings -</caption> -<thead> -<tr> -<th style="text-align:left;"> -LAName -</th> -<th style="text-align:right;"> -nHouseholds_2011 -</th> -<th style="text-align:right;"> -nElecMeters_2018 -</th> -<th style="text-align:right;"> -nEPCs_2020 -</th> -</tr> -</thead> -<tbody> <tr> <td style="text-align:left;"> -Southampton +INSPECTION_DATE </td> <td style="text-align:right;"> -98254 +0 </td> <td style="text-align:right;"> -108333 +1 +</td> +<td style="text-align:left;"> +2007-03-02 +</td> +<td style="text-align:left;"> +2020-06-30 +</td> +<td style="text-align:left;"> +2014-10-14 </td> <td style="text-align:right;"> -71179 +3906 </td> </tr> </tbody> </table> -<pre class="r"><code>nHouseholds_2011f <- sum(sotonMSOA_DT$nHHs_tenure) -nElecMeters_2018f <- sum(sotonMSOA_DT$nElecMeters) -nEPCs_2020f <- sum(sotonMSOA_DT$nEPCs) - -makePC <- function(x,y,r){ - # make a percent of x/y and round it to r decimal places - pc <- round(100*(x/y),r) - return(pc) -}</code></pre> -<p>From this we calculate that number of EPCs we have is:</p> -<ul> -<li>72.4% of Census 2011 households -<ul> -<li>65.7% of the recorded 2018 electricity meters</li> -</ul></li> -</ul> -<p>We can also see that despite having ‘missing’ EPCs, the estimated total EPC-derived energy demand is marginally higher than the BEIS-derived weather corrected energy demand data. Given that the BEIS data accounts for all heating, cooking, hot water, lighting and appliance use we would expect the EPC data to be lower <em>even if no EPCs were missing…</em></p> -<pre class="r"><code>sotonMSOA_DT[, dep0_pc := 100*(`Household Deprivation: Household is not deprived in any dimension; measures: Value`/nHHs_deprivation)] -sotonMSOA_DT[, socRent_pc := 100*(census2011_socialRent/nHHs_tenure)] -sotonMSOA_DT[, privRent_pc := 100*(census2011_privateRent/nHHs_tenure)] -sotonMSOA_DT[, ownerOcc_pc := 100*(census2011_ownerOccupy/nHHs_tenure)] - -t <- sotonMSOA_DT[, .(MSOAName, MSOACode, nHHs_tenure,nElecMeters,nEPCs, - dep0_pc, socRent_pc, privRent_pc, ownerOcc_pc,sumEpcMWh, beisEnergyMWh )] - -t[, pc_missingHH := makePC(nEPCs,nHHs_tenure,1)] -t[, pc_missingMeters := makePC(nEPCs,nElecMeters,1)] -t[, pc_energyBEIS := makePC(sumEpcMWh,beisEnergyMWh,1)] - -kt1 <- t - -ggplot2::ggplot(t, aes(x = pc_missingHH, - y = pc_missingMeters, - colour = round(ownerOcc_pc))) + - geom_abline(alpha = 0.2, slope=1, intercept=0) + - geom_point() + - scale_color_continuous(name = "% owner occupiers \n(Census 2011)", high = "red", low = "green") + - #theme(legend.position = "bottom") + - labs(x = "EPCs 2020 as % of Census 2011 households", - y = "EPCs 2020 as % of electricity meters 2018", - caption = "x = y line included for clarity")</code></pre> -<div class="figure"><span id="fig:missingEPCbyMSOA"></span> -<img src="" alt="% 'missing' rates comparison" width="672" /> -<p class="caption"> -Figure 4.1: % ‘missing’ rates comparison -</p> -</div> -<pre class="r"><code>outlierMSOA <- t[pc_missingHH > 100]</code></pre> -<p>Figure <a href="#fig:missingEPCbyMSOA">4.1</a> (see Table <a href="#tab:bigMSOATable">8.1</a> below for details) suggests that rates vary considerably by MSOA but are relatively consistent across the two baseline ‘truth’ estimates with the exception of E02003577 which appears to have many more EPCs than Census 2011 households. It is worth noting that <a href="https://www.localhealth.org.uk/#c=report&chapter=c01&report=r01&selgeo1=msoa_2011.E02003577&selgeo2=eng.E92000001">this MSOA</a> covers the city centre and dock areas which have had substantial new build since 2011 and so may have households inhabiting dwellings that did not exist at Census 2011. This is also supported by the considerably higher EPC derived energy demand data compared to BEIS’s 2018 data - although it suggests the dwellings are either very new (since 2018) or are yet to be occupied.</p> -<p>As we would expect those MSOAs with the lowest EPC coverage on both baseline measures tend to have higher proportions of owner occupiers.</p> -<p>We can use the same approach to compare estimates of total energy demand at the MSOA level. To do this we compare:</p> -<ul> -<li>estimated total energy demand in MWh/year derived from the EPC estimates. This energy only relates to <code>current primary energy</code> (space heating, hot water and lighting) and of course also suffers from missing EPCs (see above)</li> -<li>observed electricity and gas demand collated by BEIS for their sub-national statistical series. This applies to all domestic energy demand but the most recent data is for 2018 so will suffer from the absence of dwellings that are present in the most recent EPC data (see above).</li> -</ul> -<p>We should therefore not expect the values to match but we might reasonably expect a correlation.</p> -<pre class="r"><code>ggplot2::ggplot(t, aes(x = sumEpcMWh, - y = beisEnergyMWh, - colour = round(ownerOcc_pc))) + - geom_abline(alpha = 0.2, slope=1, intercept=0) + - geom_point() + - scale_color_continuous(name = "% owner occupiers \n(Census 2011)", high = "red", low = "green") + - #theme(legend.position = "bottom") + - labs(x = "EPC 2020 derived total MWh/year", - y = "BEIS 2018 derived total MWh/year", - caption = "x = y line included for clarity")</code></pre> -<div class="figure"><span id="fig:energyMSOAPlot"></span> -<img src="" alt="Energy demand comparison" width="672" /> -<p class="caption"> -Figure 4.2: Energy demand comparison -</p> -</div> -<pre class="r"><code>outlier <- t[sumEpcMWh > 70000]</code></pre> -<p>Figure <a href="#fig:energyMSOAPlot">4.2</a> shows that both of these are true. MSOAs with a high proportion of owner occupiers (and therefore more likely to have missing EPCs) tend to have higher observed energy demand than the EOC data suggests - they are above the reference line. MSOAs with a lower proportion of owner occupiers (and therefore more likely to have more complete EPC coverage) tend to be on or below the line. As before we have the same notable outlier (E02003577) and for the same reasons… In this case this produces a much higher energy demand estimate than the BEIS 2018 data records.</p> -</div> -<div id="check-beis-data" class="section level1"> -<h1><span class="header-section-number">5</span> Check BEIS data</h1> -<p>While we’re here we’ll also check the BEIS data. Table <a href="#tab:beisDesc">5.1</a> shows the five highest and lowest MSOAs by annual electricity use.</p> -<pre class="r"><code>t1 <- head(sotonMSOA_DT[, .(MSOA11NM, MSOA11CD, beisElecMWh, nElecMeters, - beisGasMWh, nGasMeters)][order(-beisElecMWh)],5) - -kableExtra::kable(t1, caption = "Southampton MSOAs: BEIS 2018 energy data ordered by highest electricity (top 5)") %>% - kable_styling()</code></pre> -<table class="table" style="margin-left: auto; margin-right: auto;"> -<caption> -<span id="tab:beisDesc">Table 5.1: </span>Southampton MSOAs: BEIS 2018 energy data ordered by highest electricity (top 5) -</caption> +<p><strong>Variable type: numeric</strong></p> +<table> <thead> <tr> <th style="text-align:left;"> -MSOA11NM +skim_variable </th> -<th style="text-align:left;"> -MSOA11CD +<th style="text-align:right;"> +n_missing </th> <th style="text-align:right;"> -beisElecMWh +complete_rate </th> <th style="text-align:right;"> -nElecMeters +mean </th> <th style="text-align:right;"> -beisGasMWh +sd </th> <th style="text-align:right;"> -nGasMeters +p0 +</th> +<th style="text-align:right;"> +p25 +</th> +<th style="text-align:right;"> +p50 +</th> +<th style="text-align:right;"> +p75 +</th> +<th style="text-align:right;"> +p100 +</th> +<th style="text-align:left;"> +hist </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> -Southampton 029 +ENVIRONMENT_IMPACT_CURRENT +</td> +<td style="text-align:right;"> +0 +</td> +<td style="text-align:right;"> +1.00 +</td> +<td style="text-align:right;"> +62.51 +</td> +<td style="text-align:right;"> +15.72 +</td> +<td style="text-align:right;"> +1.00 +</td> +<td style="text-align:right;"> +52.0 +</td> +<td style="text-align:right;"> +63.00 +</td> +<td style="text-align:right;"> +73 +</td> +<td style="text-align:right;"> +100.00 +</td> +<td style="text-align:left;"> +â–▂▆▇▂ +</td> +</tr> +<tr> +<td style="text-align:left;"> +ENERGY_CONSUMPTION_CURRENT </td> -<td style="text-align:left;"> -E02003577 +<td style="text-align:right;"> +0 </td> <td style="text-align:right;"> -27352.70 +1.00 </td> <td style="text-align:right;"> -6734 +263.23 </td> <td style="text-align:right;"> -20108.63 +140.47 </td> <td style="text-align:right;"> -2420 +4.00 +</td> +<td style="text-align:right;"> +174.0 +</td> +<td style="text-align:right;"> +233.00 +</td> +<td style="text-align:right;"> +327 +</td> +<td style="text-align:right;"> +1597.00 +</td> +<td style="text-align:left;"> +▇▂â–â–â– </td> </tr> <tr> <td style="text-align:left;"> -Southampton 014 +CO2_EMISSIONS_CURRENT </td> -<td style="text-align:left;"> -E02003562 +<td style="text-align:right;"> +0 </td> <td style="text-align:right;"> -14757.18 +1.00 </td> <td style="text-align:right;"> -3921 +3.17 </td> <td style="text-align:right;"> -36532.48 +1.94 </td> <td style="text-align:right;"> -2983 +0.10 +</td> +<td style="text-align:right;"> +1.8 +</td> +<td style="text-align:right;"> +2.85 +</td> +<td style="text-align:right;"> +4 +</td> +<td style="text-align:right;"> +77.00 +</td> +<td style="text-align:left;"> +â–‡â–â–â–â– </td> </tr> <tr> <td style="text-align:left;"> -Southampton 022 +PHOTO_SUPPLY </td> -<td style="text-align:left;"> -E02003570 +<td style="text-align:right;"> +38495 </td> <td style="text-align:right;"> -14719.37 +0.46 </td> <td style="text-align:right;"> -4142 +0.59 </td> <td style="text-align:right;"> -34730.60 +5.11 </td> <td style="text-align:right;"> -3083 +0.00 +</td> +<td style="text-align:right;"> +0.0 +</td> +<td style="text-align:right;"> +0.00 +</td> +<td style="text-align:right;"> +0 +</td> +<td style="text-align:right;"> +100.00 +</td> +<td style="text-align:left;"> +â–‡â–â–â–â– </td> </tr> <tr> <td style="text-align:left;"> -Southampton 031 +WIND_TURBINE_COUNT </td> -<td style="text-align:left;"> -E02003579 +<td style="text-align:right;"> +5546 </td> <td style="text-align:right;"> -13860.94 +0.92 </td> <td style="text-align:right;"> -4460 +0.00 </td> <td style="text-align:right;"> -34052.12 +0.02 </td> <td style="text-align:right;"> -3068 +-1.00 +</td> +<td style="text-align:right;"> +0.0 +</td> +<td style="text-align:right;"> +0.00 +</td> +<td style="text-align:right;"> +0 +</td> +<td style="text-align:right;"> +1.00 +</td> +<td style="text-align:left;"> +â–â–â–‡â–â– </td> </tr> <tr> <td style="text-align:left;"> -Southampton 021 +TOTAL_FLOOR_AREA </td> -<td style="text-align:left;"> -E02003569 +<td style="text-align:right;"> +0 </td> <td style="text-align:right;"> -13719.22 +1.00 </td> <td style="text-align:right;"> -3999 +73.05 </td> <td style="text-align:right;"> -27661.45 +34.86 </td> <td style="text-align:right;"> -2722 +5.85 +</td> +<td style="text-align:right;"> +49.0 +</td> +<td style="text-align:right;"> +69.00 +</td> +<td style="text-align:right;"> +87 +</td> +<td style="text-align:right;"> +1353.68 +</td> +<td style="text-align:left;"> +â–‡â–â–â–â– </td> </tr> -</tbody> -</table> -<pre class="r"><code>t2 <- tail(sotonMSOA_DT[, .(MSOA11NM, MSOA11CD, beisElecMWh, nElecMeters, - beisGasMWh, nGasMeters)][order(-beisElecMWh)],5) - -kableExtra::kable(t2, caption = "Southampton MSOAs: BEIS 2018 energy data ordered by lowest electricity (bottom 5)") %>% - kable_styling()</code></pre> -<table class="table" style="margin-left: auto; margin-right: auto;"> -<caption> -<span id="tab:beisDesc">Table 5.1: </span>Southampton MSOAs: BEIS 2018 energy data ordered by lowest electricity (bottom 5) -</caption> -<thead> -<tr> -<th style="text-align:left;"> -MSOA11NM -</th> -<th style="text-align:left;"> -MSOA11CD -</th> -<th style="text-align:right;"> -beisElecMWh -</th> -<th style="text-align:right;"> -nElecMeters -</th> -<th style="text-align:right;"> -beisGasMWh -</th> -<th style="text-align:right;"> -nGasMeters -</th> -</tr> -</thead> -<tbody> <tr> <td style="text-align:left;"> -Southampton 024 +epcIsSocialRent </td> -<td style="text-align:left;"> -E02003572 +<td style="text-align:right;"> +0 </td> <td style="text-align:right;"> -9347.893 +1.00 </td> <td style="text-align:right;"> -2597 +0.21 </td> <td style="text-align:right;"> -30332.49 +0.40 </td> <td style="text-align:right;"> -2381 +0.00 +</td> +<td style="text-align:right;"> +0.0 +</td> +<td style="text-align:right;"> +0.00 +</td> +<td style="text-align:right;"> +0 +</td> +<td style="text-align:right;"> +1.00 +</td> +<td style="text-align:left;"> +â–‡â–â–â–â–‚ </td> </tr> <tr> <td style="text-align:left;"> -Southampton 018 +epcIsPrivateRent </td> -<td style="text-align:left;"> -E02003566 +<td style="text-align:right;"> +0 </td> <td style="text-align:right;"> -9221.544 +1.00 </td> <td style="text-align:right;"> -2831 +0.27 </td> <td style="text-align:right;"> -26826.22 +0.44 </td> <td style="text-align:right;"> -2607 +0.00 +</td> +<td style="text-align:right;"> +0.0 +</td> +<td style="text-align:right;"> +0.00 +</td> +<td style="text-align:right;"> +1 +</td> +<td style="text-align:right;"> +1.00 +</td> +<td style="text-align:left;"> +â–‡â–â–â–â–ƒ </td> </tr> <tr> <td style="text-align:left;"> -Southampton 008 +epcIsOwnerOcc </td> -<td style="text-align:left;"> -E02003556 +<td style="text-align:right;"> +0 </td> <td style="text-align:right;"> -9199.673 +1.00 </td> <td style="text-align:right;"> -2589 +0.41 </td> <td style="text-align:right;"> -26412.36 +0.49 </td> <td style="text-align:right;"> -2295 +0.00 +</td> +<td style="text-align:right;"> +0.0 +</td> +<td style="text-align:right;"> +0.00 +</td> +<td style="text-align:right;"> +1 +</td> +<td style="text-align:right;"> +1.00 </td> -</tr> -<tr> <td style="text-align:left;"> -Southampton 003 +â–‡â–â–â–â–† </td> +</tr> +<tr> <td style="text-align:left;"> -E02003551 +epcIsUnknownTenure </td> <td style="text-align:right;"> -8957.742 +0 </td> <td style="text-align:right;"> -2446 +1.00 </td> <td style="text-align:right;"> -17358.87 +0.04 </td> <td style="text-align:right;"> -1649 +0.19 </td> -</tr> -<tr> -<td style="text-align:left;"> -Southampton 005 -</td> -<td style="text-align:left;"> -E02003553 +<td style="text-align:right;"> +0.00 </td> <td style="text-align:right;"> -8479.993 +0.0 </td> <td style="text-align:right;"> -2464 +0.00 </td> <td style="text-align:right;"> -24996.91 +0 </td> <td style="text-align:right;"> -2303 +1.00 +</td> +<td style="text-align:left;"> +â–‡â–â–â–â– </td> </tr> </tbody> </table> +<p>This leaves us with a total of 71,502 properties.</p> +<pre class="r"><code>library(stringr) +finalEPCDT[, POSTCODE_s := stringr::str_remove_all(POSTCODE, " ")] +sotonPostcodesReducedDT[, POSTCODE_s := stringr::str_remove_all(pcds, " ")] +setkey(finalEPCDT, POSTCODE_s) +setkey(sotonPostcodesReducedDT, POSTCODE_s) +dt <- sotonPostcodesReducedDT[finalEPCDT] +dt[, MSOACode := msoa11] + +setkey(dt, MSOACode) +setkey(sotonCensus2011_DT, MSOACode) + +dt <- sotonCensus2011_DT[dt] + +of <- path.expand("~/data/EW_epc/domestic-E06000045-Southampton/EPCs_liveFinalClean.csv") +data.table::fwrite(dt, file = of) + +message("Gziping ", of)</code></pre> +<pre><code>## Gziping /Users/ben/data/EW_epc/domestic-E06000045-Southampton/EPCs_liveFinalClean.csv</code></pre> +<pre class="r"><code># Gzip it +# in case it fails (it will on windows - you will be left with a .csv file) +try(system( paste0("gzip -f '", of,"'"))) # include ' or it breaks on spaces +message("Gzipped ", of)</code></pre> +<pre><code>## Gzipped /Users/ben/data/EW_epc/domestic-E06000045-Southampton/EPCs_liveFinalClean.csv</code></pre> +<p>NB: this failed to match an EPC postcode to an MSOA for 72 EPCs The table below shows which postcodes these were by date.</p> +<pre class="r"><code>dt[is.na(MSOACode), .(nEPCs = .N), keyby = .(POSTCODE_s, TENURE, INSPECTION_DATE)]</code></pre> +<pre><code>## POSTCODE_s TENURE INSPECTION_DATE nEPCs +## 1: SO156GB unknown 2015-04-08 3 +## 2: SO156GB unknown 2015-06-09 1 +## 3: SO156GB unknown 2015-07-06 24 +## 4: SO156GB unknown 2015-08-04 12 +## 5: SO156GB unknown 2015-08-14 9 +## 6: SO156GB unknown 2015-08-19 1 +## 7: SO160AL 2009-02-17 4 +## 8: SO160AL rental (private) 2019-11-20 2 +## 9: SO162AJ unknown 2018-03-22 2 +## 10: SO167HE owner-occupied 2009-11-30 1 +## 11: SO167HE owner-occupied 2017-10-04 1 +## 12: SO168AD 2011-03-18 1 +## 13: SO168AD owner-occupied 2008-10-01 1 +## 14: SO168AD owner-occupied 2009-09-29 1 +## 15: SO168AD owner-occupied 2019-04-08 1 +## 16: SO168AD owner-occupied 2020-06-17 1 +## 17: SO181HS rental (private) 2010-04-27 1 +## 18: SO185BR owner-occupied 2018-06-05 1 +## 19: SO185BS owner-occupied 2012-02-15 1 +## 20: SO185BS owner-occupied 2018-03-14 1 +## 21: SO196AQ unknown 2014-11-05 3 +## POSTCODE_s TENURE INSPECTION_DATE nEPCs</code></pre> </div> -<div id="save-msoa-aggregates-for-re-use" class="section level1"> -<h1><span class="header-section-number">6</span> Save MSOA aggregates for re-use</h1> +<div id="summarise-and-save-msoa-aggregates-for-re-use" class="section level1"> +<h1><span class="header-section-number">5</span> Summarise and save MSOA aggregates for re-use</h1> <p>Finally we save the MSOA table into the repo data directory for future use. We don’t usually advocate keeping data in a git repo but this is small, aggregated and <a href="https://en.wikipedia.org/wiki/Mostly_Harmless">mostly harmless</a>.</p> <pre class="r"><code>of <- here::here("data", "sotonMSOAdata.csv") @@ -5354,7 +5609,7 @@ message("Saved ", nrow(sotonMSOA_DT), " rows of data.")</cod <pre><code>## Saved 32 rows of data.</code></pre> </div> <div id="r-packages-used" class="section level1"> -<h1><span class="header-section-number">7</span> R packages used</h1> +<h1><span class="header-section-number">6</span> R packages used</h1> <ul> <li>rmarkdown <span class="citation">(Allaire et al. 2018)</span></li> <li>bookdown <span class="citation">(Xie 2016a)</span></li> @@ -5366,14 +5621,14 @@ message("Saved ", nrow(sotonMSOA_DT), " rows of data.")</cod </ul> </div> <div id="annex" class="section level1"> -<h1><span class="header-section-number">8</span> Annex</h1> +<h1><span class="header-section-number">7</span> Annex</h1> <div id="tables" class="section level2"> -<h2><span class="header-section-number">8.1</span> Tables</h2> +<h2><span class="header-section-number">7.1</span> Tables</h2> <pre class="r"><code>kableExtra::kable(kt1[order(-pc_missingHH)], digits = 2, caption = "EPC records as a % of n census households and n meters per MSOA") %>% kable_styling()</code></pre> <table class="table" style="margin-left: auto; margin-right: auto;"> <caption> -<span id="tab:bigMSOATable">Table 8.1: </span>EPC records as a % of n census households and n meters per MSOA +<span id="tab:bigMSOATable">Table 7.1: </span>EPC records as a % of n census households and n meters per MSOA </caption> <thead> <tr> @@ -5436,7 +5691,7 @@ E02003577 6734 </td> <td style="text-align:right;"> -5879 +5917 </td> <td style="text-align:right;"> 37.92 @@ -5451,19 +5706,19 @@ E02003577 24.67 </td> <td style="text-align:right;"> -77245.24 +77383.23 </td> <td style="text-align:right;"> 47461.33 </td> <td style="text-align:right;"> -119.8 +120.6 </td> <td style="text-align:right;"> -87.3 +87.9 </td> <td style="text-align:right;"> -162.8 +163.0 </td> </tr> <tr> @@ -5512,90 +5767,90 @@ E02003571 </tr> <tr> <td style="text-align:left;"> -Southampton 022 +Southampton 017 </td> <td style="text-align:left;"> -E02003570 +E02003565 </td> <td style="text-align:right;"> -3635 +2563 </td> <td style="text-align:right;"> -4142 +2840 </td> <td style="text-align:right;"> -3424 +2427 </td> <td style="text-align:right;"> -25.56 +48.81 </td> <td style="text-align:right;"> -29.82 +11.63 </td> <td style="text-align:right;"> -45.69 +57.24 </td> <td style="text-align:right;"> -22.59 +28.95 </td> <td style="text-align:right;"> -58326.68 +42637.78 </td> <td style="text-align:right;"> -49449.97 +35191.63 </td> <td style="text-align:right;"> -94.2 +94.7 </td> <td style="text-align:right;"> -82.7 +85.5 </td> <td style="text-align:right;"> -118.0 +121.2 </td> </tr> <tr> <td style="text-align:left;"> -Southampton 017 +Southampton 022 </td> <td style="text-align:left;"> -E02003565 +E02003570 </td> <td style="text-align:right;"> -2563 +3635 </td> <td style="text-align:right;"> -2840 +4142 </td> <td style="text-align:right;"> -2397 +3429 </td> <td style="text-align:right;"> -48.81 +25.56 </td> <td style="text-align:right;"> -11.63 +29.82 </td> <td style="text-align:right;"> -57.24 +45.69 </td> <td style="text-align:right;"> -28.95 +22.59 </td> <td style="text-align:right;"> -42543.37 +58338.87 </td> <td style="text-align:right;"> -35191.63 +49449.97 </td> <td style="text-align:right;"> -93.5 +94.3 </td> <td style="text-align:right;"> -84.4 +82.8 </td> <td style="text-align:right;"> -120.9 +118.0 </td> </tr> <tr> @@ -5612,7 +5867,7 @@ E02003579 4460 </td> <td style="text-align:right;"> -3112 +3137 </td> <td style="text-align:right;"> 44.92 @@ -5627,19 +5882,19 @@ E02003579 63.09 </td> <td style="text-align:right;"> -46140.81 +46309.49 </td> <td style="text-align:right;"> 47913.06 </td> <td style="text-align:right;"> -92.7 +93.4 </td> <td style="text-align:right;"> -69.8 +70.3 </td> <td style="text-align:right;"> -96.3 +96.7 </td> </tr> <tr> @@ -5688,6 +5943,50 @@ E02003561 </tr> <tr> <td style="text-align:left;"> +Southampton 021 +</td> +<td style="text-align:left;"> +E02003569 +</td> +<td style="text-align:right;"> +3527 +</td> +<td style="text-align:right;"> +3999 +</td> +<td style="text-align:right;"> +2754 +</td> +<td style="text-align:right;"> +40.71 +</td> +<td style="text-align:right;"> +15.00 +</td> +<td style="text-align:right;"> +38.28 +</td> +<td style="text-align:right;"> +44.32 +</td> +<td style="text-align:right;"> +47718.63 +</td> +<td style="text-align:right;"> +41380.67 +</td> +<td style="text-align:right;"> +78.1 +</td> +<td style="text-align:right;"> +68.9 +</td> +<td style="text-align:right;"> +115.3 +</td> +</tr> +<tr> +<td style="text-align:left;"> Southampton 009 </td> <td style="text-align:left;"> @@ -5744,7 +6043,7 @@ E02003568 3900 </td> <td style="text-align:right;"> -2954 +2959 </td> <td style="text-align:right;"> 50.08 @@ -5765,10 +6064,10 @@ E02003568 47024.09 </td> <td style="text-align:right;"> -77.3 +77.5 </td> <td style="text-align:right;"> -75.7 +75.9 </td> <td style="text-align:right;"> 113.7 @@ -5788,7 +6087,7 @@ E02003558 3222 </td> <td style="text-align:right;"> -2216 +2223 </td> <td style="text-align:right;"> 33.96 @@ -5803,16 +6102,16 @@ E02003558 39.81 </td> <td style="text-align:right;"> -38568.39 +38565.55 </td> <td style="text-align:right;"> 34421.99 </td> <td style="text-align:right;"> -75.8 +76.0 </td> <td style="text-align:right;"> -68.8 +69.0 </td> <td style="text-align:right;"> 112.0 @@ -5820,50 +6119,6 @@ E02003558 </tr> <tr> <td style="text-align:left;"> -Southampton 021 -</td> -<td style="text-align:left;"> -E02003569 -</td> -<td style="text-align:right;"> -3527 -</td> -<td style="text-align:right;"> -3999 -</td> -<td style="text-align:right;"> -2671 -</td> -<td style="text-align:right;"> -40.71 -</td> -<td style="text-align:right;"> -15.00 -</td> -<td style="text-align:right;"> -38.28 -</td> -<td style="text-align:right;"> -44.32 -</td> -<td style="text-align:right;"> -46831.10 -</td> -<td style="text-align:right;"> -41380.67 -</td> -<td style="text-align:right;"> -75.7 -</td> -<td style="text-align:right;"> -66.8 -</td> -<td style="text-align:right;"> -113.2 -</td> -</tr> -<tr> -<td style="text-align:left;"> Southampton 015 </td> <td style="text-align:left;"> @@ -5876,7 +6131,7 @@ E02003563 3818 </td> <td style="text-align:right;"> -2545 +2553 </td> <td style="text-align:right;"> 37.81 @@ -5891,16 +6146,16 @@ E02003563 51.79 </td> <td style="text-align:right;"> -46490.24 +46524.57 </td> <td style="text-align:right;"> 39920.85 </td> <td style="text-align:right;"> -73.1 +73.3 </td> <td style="text-align:right;"> -66.7 +66.9 </td> <td style="text-align:right;"> 116.5 @@ -5964,7 +6219,7 @@ E02003555 3763 </td> <td style="text-align:right;"> -2258 +2261 </td> <td style="text-align:right;"> 34.59 @@ -5979,19 +6234,19 @@ E02003555 56.15 </td> <td style="text-align:right;"> -35186.09 +35181.90 </td> <td style="text-align:right;"> 40416.83 </td> <td style="text-align:right;"> -71.9 +72.0 </td> <td style="text-align:right;"> -60.0 +60.1 </td> <td style="text-align:right;"> -87.1 +87.0 </td> </tr> <tr> @@ -6008,7 +6263,7 @@ E02003553 2464 </td> <td style="text-align:right;"> -1686 +1687 </td> <td style="text-align:right;"> 39.01 @@ -6023,19 +6278,19 @@ E02003553 32.00 </td> <td style="text-align:right;"> -31930.49 +31991.63 </td> <td style="text-align:right;"> 33476.91 </td> <td style="text-align:right;"> -70.4 +70.5 </td> <td style="text-align:right;"> -68.4 +68.5 </td> <td style="text-align:right;"> -95.4 +95.6 </td> </tr> <tr> @@ -6052,7 +6307,7 @@ E02003562 3921 </td> <td style="text-align:right;"> -2489 +2513 </td> <td style="text-align:right;"> 45.68 @@ -6073,10 +6328,10 @@ E02003562 51289.66 </td> <td style="text-align:right;"> -68.5 +69.1 </td> <td style="text-align:right;"> -63.5 +64.1 </td> <td style="text-align:right;"> 91.9 @@ -6096,7 +6351,7 @@ E02003580 2825 </td> <td style="text-align:right;"> -1783 +1804 </td> <td style="text-align:right;"> 27.21 @@ -6111,19 +6366,19 @@ E02003580 35.69 </td> <td style="text-align:right;"> -30726.44 +31120.78 </td> <td style="text-align:right;"> 24488.16 </td> <td style="text-align:right;"> -68.1 +68.9 </td> <td style="text-align:right;"> -63.1 +63.9 </td> <td style="text-align:right;"> -125.5 +127.1 </td> </tr> <tr> @@ -6172,90 +6427,90 @@ E02003573 </tr> <tr> <td style="text-align:left;"> -Southampton 012 +Southampton 003 </td> <td style="text-align:left;"> -E02003560 +E02003551 </td> <td style="text-align:right;"> -3040 +2256 </td> <td style="text-align:right;"> -3191 +2446 </td> <td style="text-align:right;"> -1952 +1456 </td> <td style="text-align:right;"> -26.97 +33.69 </td> <td style="text-align:right;"> -53.52 +38.96 </td> <td style="text-align:right;"> -8.75 +15.29 </td> <td style="text-align:right;"> -36.12 +42.95 </td> <td style="text-align:right;"> -33862.76 +27395.69 </td> <td style="text-align:right;"> -34252.94 +26316.61 </td> <td style="text-align:right;"> -64.2 +64.5 </td> <td style="text-align:right;"> -61.2 +59.5 </td> <td style="text-align:right;"> -98.9 +104.1 </td> </tr> <tr> <td style="text-align:left;"> -Southampton 003 +Southampton 012 </td> <td style="text-align:left;"> -E02003551 +E02003560 </td> <td style="text-align:right;"> -2256 +3040 </td> <td style="text-align:right;"> -2446 +3191 </td> <td style="text-align:right;"> -1445 +1952 </td> <td style="text-align:right;"> -33.69 +26.97 </td> <td style="text-align:right;"> -38.96 +53.52 </td> <td style="text-align:right;"> -15.29 +8.75 </td> <td style="text-align:right;"> -42.95 +36.12 </td> <td style="text-align:right;"> -27436.58 +33862.76 </td> <td style="text-align:right;"> -26316.61 +34252.94 </td> <td style="text-align:right;"> -64.1 +64.2 </td> <td style="text-align:right;"> -59.1 +61.2 </td> <td style="text-align:right;"> -104.3 +98.9 </td> </tr> <tr> @@ -6272,7 +6527,7 @@ E02003554 2873 </td> <td style="text-align:right;"> -1683 +1684 </td> <td style="text-align:right;"> 46.49 @@ -6287,7 +6542,7 @@ E02003554 63.00 </td> <td style="text-align:right;"> -35573.16 +35570.20 </td> <td style="text-align:right;"> 39712.77 @@ -6348,46 +6603,46 @@ E02003552 </tr> <tr> <td style="text-align:left;"> -Southampton 028 +Southampton 016 </td> <td style="text-align:left;"> -E02003576 +E02003564 </td> <td style="text-align:right;"> -3434 +3474 </td> <td style="text-align:right;"> -3614 +3563 </td> <td style="text-align:right;"> -2120 +2164 </td> <td style="text-align:right;"> -38.99 +39.38 </td> <td style="text-align:right;"> -22.83 +22.54 </td> <td style="text-align:right;"> -18.58 +12.09 </td> <td style="text-align:right;"> -56.41 +63.39 </td> <td style="text-align:right;"> -39556.61 +43679.63 </td> <td style="text-align:right;"> -44100.48 +43718.49 </td> <td style="text-align:right;"> -61.7 +62.3 </td> <td style="text-align:right;"> -58.7 +60.7 </td> <td style="text-align:right;"> -89.7 +99.9 </td> </tr> <tr> @@ -6404,7 +6659,7 @@ E02003566 2831 </td> <td style="text-align:right;"> -1600 +1617 </td> <td style="text-align:right;"> 35.21 @@ -6419,63 +6674,63 @@ E02003566 59.42 </td> <td style="text-align:right;"> -27067.98 +27441.65 </td> <td style="text-align:right;"> 36047.76 </td> <td style="text-align:right;"> -61.4 +62.0 </td> <td style="text-align:right;"> -56.5 +57.1 </td> <td style="text-align:right;"> -75.1 +76.1 </td> </tr> <tr> <td style="text-align:left;"> -Southampton 016 +Southampton 028 </td> <td style="text-align:left;"> -E02003564 +E02003576 </td> <td style="text-align:right;"> -3474 +3434 </td> <td style="text-align:right;"> -3563 +3614 </td> <td style="text-align:right;"> -2124 +2121 </td> <td style="text-align:right;"> -39.38 +38.99 </td> <td style="text-align:right;"> -22.54 +22.83 </td> <td style="text-align:right;"> -12.09 +18.58 </td> <td style="text-align:right;"> -63.39 +56.41 </td> <td style="text-align:right;"> -42655.74 +39556.61 </td> <td style="text-align:right;"> -43718.49 +44100.48 </td> <td style="text-align:right;"> -61.1 +61.8 </td> <td style="text-align:right;"> -59.6 +58.7 </td> <td style="text-align:right;"> -97.6 +89.7 </td> </tr> <tr> @@ -6536,7 +6791,7 @@ E02003550 3527 </td> <td style="text-align:right;"> -1923 +1932 </td> <td style="text-align:right;"> 43.10 @@ -6551,19 +6806,19 @@ E02003550 66.08 </td> <td style="text-align:right;"> -36493.87 +36605.55 </td> <td style="text-align:right;"> 41124.17 </td> <td style="text-align:right;"> -59.8 +60.1 </td> <td style="text-align:right;"> -54.5 +54.8 </td> <td style="text-align:right;"> -88.7 +89.0 </td> </tr> <tr> @@ -6624,7 +6879,7 @@ E02003574 3599 </td> <td style="text-align:right;"> -1971 +1972 </td> <td style="text-align:right;"> 40.77 @@ -6800,7 +7055,7 @@ E02003556 2589 </td> <td style="text-align:right;"> -1321 +1339 </td> <td style="text-align:right;"> 42.57 @@ -6815,19 +7070,19 @@ E02003556 70.42 </td> <td style="text-align:right;"> -27501.24 +28093.93 </td> <td style="text-align:right;"> 35612.03 </td> <td style="text-align:right;"> -53.5 +54.2 </td> <td style="text-align:right;"> -51.0 +51.7 </td> <td style="text-align:right;"> -77.2 +78.9 </td> </tr> </tbody> -- GitLab