diff --git a/itsTheCatsStupid/itsTheCatsStupid.Rmd b/itsTheCatsStupid/itsTheCatsStupid.Rmd
index 83d9dd86deb84e9c54e74394349519f2bf274d11..1bb62e8c61f715a65f11f1c126608f5f449dee65 100644
--- a/itsTheCatsStupid/itsTheCatsStupid.Rmd
+++ b/itsTheCatsStupid/itsTheCatsStupid.Rmd
@@ -82,45 +82,74 @@ nrow(pc_district[!is.na(GOR10NM)])
 # pending further investigation
 
 t <- pc_district[!is.na(GOR10NM), .(nPostcodeDistricts = .N,
-                     sumCats = sum(EstimatedCatPopulation)), keyby=.(GOR10NM)]
+                     sumCats = sum(EstimatedCatPopulation),
+                     sumElecMeters = sum(nElecMeters, na.rm = TRUE)), keyby=.(GOR10NM)]
 
 t[, catsPerDistrict := sumCats/nPostcodeDistricts]
-makeFlexTable(t, cap = "Regions covered")
+t[, catsPerDwelling := sumCats/sumElecMeters]
+makeFlexTable(t, cap = "Regions covered (1 electricity meter assumed to be 1 dwelling)")
+
+message("Total number of electricity meters:", sum(t$sumElecMeters))
 ```
 
 # What do we find?
 
 Well, in some places there seem to be a lot of estimated cats per household...
 
-(We calculated mean cats per household by dividing by the number of electricity meters - probably a reasonable proxy)
+(We calculated mean cats per household by dividing by the number of electricity meters - should be a reasonable proxy)
 
 ```{r maxCats}
 
-t <- head(pc_district[, .(PostcodeDistrict, EstimatedCatPopulation, mean_Cats, nPostcodes, nElecMeters)][order(-mean_Cats)],10)
+t <- head(pc_district[, .(Region = GOR10NM, PostcodeDistrict, EstimatedCatPopulation, mean_Cats, nPostcodes, nElecMeters,nGasMeters)][order(-mean_Cats)],10)
 makeFlexTable(t, cap = "Top 10 postcode districts by number of cats per 'household'")
 ```
 SA63 is in south west [Wales](https://www.google.co.uk/maps/place/Clarbeston+Road+SA63/@51.8852685,-4.9147384,12z/data=!3m1!4b1!4m5!3m4!1s0x4868d5805b12efe5:0xca42ee4bc84a2f77!8m2!3d51.8900045!4d-4.8502065) while LL23 is on the edge of the [Snowdonia National Park](https://www.google.co.uk/maps/place/Bala+LL23/@52.8953768,-3.7752989,11z/data=!3m1!4b1!4m5!3m4!1s0x4865404ae1208f67:0x65a437b997c0dfb2!8m2!3d52.8825403!4d-3.6497989)....
 
-Do these places have some largish catteries but few houses? 8,233 is a lot of estimated cats.
+Do these places have some largish catteries but few houses?
+
+```{r histoCats, fig.cap = "Histogram of mean number of cats per dwelling"}
+ggplot2::ggplot(pc_district, aes(x = mean_Cats)) +
+  geom_histogram() +
+  labs(caption = "Postcode districts")
+
+summary(pc_district$mean_Cats)
+```
 
 ## More dwellings, more cats?
 
-Is there a correlation between estimated total cats and the number of dwellings (electricity meters)?
+Is there a correlation between estimated total cats and the number of dwellings (as measured by the number of electricity meters)?
 
 ```{r testTotalElecMeters}
 ggplot2::ggplot(pc_district[!is.na(GOR10NM)], aes(x = nElecMeters , y = EstimatedCatPopulation, 
                                  colour = GOR10NM)) +
+  scale_color_discrete(name = "UK `Region`") +
   geom_point() +
   geom_smooth()
 ```
 
-## More cats, more gas?
+## More cats, more energy?
+
+Clearly postcode districts with more dwellings will have higher energy use totals. So we need to compare the mean number of cats per dwelling with mean energy use per dwelling.
+
+This will need to accommodate some outliers in terms of mean number of cats as we saw above and potentially also in terms of mean energy.
+
+```{r meanEnergyOutliers}
+pc_district[, total_gas_kWh := ifelse(is.na(total_gas_kWh), 0, total_gas_kWh)]
+pc_district[, total_energy_kWh := total_gas_kWh + total_elec_kWh]
+pc_district[, mean_energy_kWh := total_energy_kWh/nElecMeters]
+
+summary(pc_district$mean_energy_kWh)
+t <- head(pc_district[, .(Region = GOR10NM, PostcodeDistrict, mean_energy_kWh, nPostcodes, nElecMeters,nGasMeters)][order(-mean_energy_kWh)],10)
+makeFlexTable(t, cap = "Top 10 postcode districts by mean energy per 'household'")
+```
 
 Is there a correlation between estimated cat ownership and total gas use?
 
-```{r testTotalGas}
+```{r meanCatsAndMeanEnergy}
+
+
 ggplot2::ggplot(pc_district[!is.na(GOR10NM)], 
-                aes(x = EstimatedCatPopulation, y = total_gas_kWh, colour = GOR10NM)) +
+                aes(x = mean_Cats, y = mean_energy_kWh, colour = GOR10NM)) +
   geom_smooth() +
   geom_point()
 ```
@@ -192,6 +221,15 @@ ggplot2::ggplot(pc_district[!is.na(GOR10NM)], aes(x = mean_Cats, y = mean_energy
   geom_point()
 ```
 
+# Data Annexes
+
+Cats data
+
+```{r skimCats}
+skimr::skim(cats_DT)
+```
+
+
 # R packages used
 
  * bookdown [@bookdown]
diff --git a/itsTheCatsStupid/makeFile.R b/itsTheCatsStupid/makeFile.R
index 162c2221a209f071fc44a24c4aadba006382ad89..fd34613692799e95a32faa30086728c69bbecb76 100644
--- a/itsTheCatsStupid/makeFile.R
+++ b/itsTheCatsStupid/makeFile.R
@@ -37,6 +37,7 @@ pc_district_elec_dt <- postcodes_elec_dt[, .(elec_nPostcodes = .N,
                                            nElecMeters = sum(`Number of meters`, na.rm = TRUE)
                                            ), keyby = .(pcd_district)]
 nrow(pc_district_elec_dt)
+summary(pc_district_elec_dt)
 
 postcodes_gas_dt <- data.table::fread(paste0(dp, "beis/subnationalGas/Experimental_Gas_Postcode_Statistics_2015.csv"))
 postcodes_gas_dt[, pcd_district := data.table::tstrsplit(POSTCODE, " ", keep = c(1))]
@@ -44,6 +45,7 @@ pc_district_gas_dt <- postcodes_gas_dt[, .(gas_nPostcodes = .N,
                                            total_gas_kWh = sum(`Consumption (kWh)`, na.rm = TRUE),
                                            nGasMeters = sum(`Number of meters`, na.rm = TRUE)), keyby = .(pcd_district)]
 nrow(pc_district_gas_dt)
+summary(pc_district_gas_dt)
 
 setkey(pc_district_elec_dt, pcd_district)
 setkey(pc_district_gas_dt, pcd_district)