Skip to content
Snippets Groups Projects
Select Git revision
  • 9defb49d1875684900464f5ac4a608ca222f6ffd
  • master default
2 results

_loadNonPowerData.Rmd

Blame
  • Load data

    NZ Area Unit data

    2013 NZ Census data from NZ Stats at area unit level. For simplicity we use one file per constraint:

    • n people
    • n dependent children
    • fuel source (all counted - may cause confusion as sum to > 100% of households)
    • n rooms

    NB: these files, when downloaded form the NZStats data extractor come with higher levels of aggregation in the tables. These have to be removed by extracting just area unit rows.

    First load area labels as we use these to select the right data rows.

    areasDT <- data.table::fread(sParams$areasTable2013)
    
    auListDT <- areasDT[, .(nMBs = .N), keyby = .(AU2013_code, AU2013_label, REGC2013_label)]
    auListDT <- auListDT[, AU2013_code := as.character(AU2013_code)] # for easier matching
    setkey(auListDT, AU2013_code)
    # fuelSource ----
    fuelf <- paste0(sParams$dataPath, "raw/areaUnits/fuelSource/TABLECODE8100_Data_47b7b3fc-0e40-431f-b313-141de4fb0013.csv")
    outputMessage(paste0("Loading: ", fuelf))
    fuelDT <- data.table::fread(fuelf)
    outputMessage(paste0("N rows loaded: ", dkUtils::tidyNum(nrow(fuelDT))))
    fuelDT <- fuelDT[, AU2013_code := AREA]
    setkey(fuelDT, AU2013_code)
    fuelDT <- fuelDT[auListDT]
    message("N unique area units (fuel data): ", uniqueN(fuelDT$AU2013_code))
    
    # create categories
    # "value.heatSourceWood", "value.heatSourceElectricity", "value.heatSourceGas", "value.heatSourceCoal", "value.heatSourceOther"
    
    fuelDT <- fuelDT[, censusConstraint := "heatSourceOther"] # complex one - note this contains 'None' as well
    fuelDT <- fuelDT[`Fuel type used to heat dwelling` == "Total dwellings, fuel type used to heat dwelling",
                     censusConstraint := "fuel_totalHouseholds"]
    fuelDT <- fuelDT[`Fuel type used to heat dwelling` == "Total dwellings stated",
                     censusConstraint := "fuel_totalStatedHouseholds"]
    fuelDT <- fuelDT[`Fuel type used to heat dwelling` == "Wood",
                     censusConstraint := "heatSourceWood"]
    fuelDT <- fuelDT[`Fuel type used to heat dwelling` %like% "Solar" | # <- what is this?
                       `Fuel type used to heat dwelling` == "Electricity",
                     censusConstraint := "heatSourceElectricity"]
    fuelDT <- fuelDT[`Fuel type used to heat dwelling`  %like% "gas",
                     censusConstraint := "heatSourceGas"]
    fuelDT <- fuelDT[`Fuel type used to heat dwelling` == "Coal",
                     censusConstraint := "heatSourceCoal"]
    table(fuelDT$`Fuel type used to heat dwelling`, fuelDT$censusConstraint)
    
    # convert to wide
    fuelDT <- fuelDT[, count := Value]
    
    fuel2013WDT <- reshape(fuelDT[YEAR == 2013,
                                  .(AU2013_code,AU2013_label,censusConstraint,count)], 
                              idvar = c("AU2013_code", "AU2013_label"),
                              timevar = "censusConstraint",
                              direction = "wide")
    
    setnames(fuel2013WDT, c("count.fuel_totalHouseholds", "count.fuel_totalStatedHouseholds",
                            "count.heatSourceElectricity", "count.heatSourceGas", 
                            "count.heatSourceWood", "count.heatSourceCoal", "count.heatSourceOther"),
             c("fuel_totalHouseholds", "fuel_totalStatedHouseholds",
                            "heatSourceElectricity", "heatSourceGas", 
                            "heatSourceWood", "heatSourceCoal", "heatSourceOther"))
    
    # nKids ----
    kidsf <- paste0(sParams$dataPath, "raw/areaUnits/nKids/TABLECODE8141_Data_e6f03066-7bbf-4ba0-94b0-1821d5a4665a.csv")
    outputMessage(paste0("Loading: ", kidsf))
    kidsDT <- data.table::fread(kidsf)
    outputMessage(paste0("N rows loaded: ", dkUtils::tidyNum(nrow(kidsDT))))
    kidsDT <- kidsDT[, AU2013_code := AREA]
    setkey(kidsDT, AU2013_code)
    kidsDT <- kidsDT[auListDT]
    
    message("N unique area units (kids data): ", uniqueN(kidsDT$AU2013_code))
    
    # > create categories
    # "nKids_0", "nKids_1m"
    kidsDT <- kidsDT[, censusConstraint := "nKids_1m"] # we selected rows with dependent children in the census extractor
    kidsDT <- kidsDT[`Family type by child dependency status` == "Total families",
                     censusConstraint := "nkids_totalFamilies"]
    
    table(kidsDT$`Family type by child dependency status`, kidsDT$censusConstraint)
    
    # > convert to wide
    kidsDT <- kidsDT[, count := Value]
    
    kids2013WDT <- reshape(kidsDT[YEAR == 2013,
                                  .(AU2013_code,AU2013_label,censusConstraint,count)], 
                              idvar = c("AU2013_code", "AU2013_label"),
                              timevar = "censusConstraint",
                              direction = "wide")
    # > calculate n households with 0 kids
    # do this again later when we have the number of _households_ to use as the base
    kids2013WDT <- kids2013WDT[, nKids_0_families := count.nkids_totalFamilies - count.nKids_1m]
    setnames(kids2013WDT, c("count.nkids_totalFamilies", "count.nKids_1m"), 
             c("nkids_totalFamilies", "nKids_1m"))
    
    
    # nPeople ----
    npeoplef <- paste0(sParams$dataPath, "raw/areaUnits/nPeople/TABLECODE8169_Data_bfad6f1a-c9af-4adb-a141-e13a83e175d0.csv")
    outputMessage(paste0("Loading: ", npeoplef))
    npeopleDT <- data.table::fread(npeoplef)
    outputMessage(paste0("N rows loaded: ", dkUtils::tidyNum(nrow(npeopleDT))))
    setkey(npeopleDT, Area) # forgot to get code attached to this one
    npeopleDT <- npeopleDT[, AU2013_label := Area]
    setkey(auListDT,AU2013_label) # set to label instead of code - should still work
    npeopleDT <- npeopleDT[auListDT]
    message("N unique area units (people data): ", uniqueN(npeopleDT$Area))
    
    # > create categories ----
    # "value.nPeople_1", "value.nPeople_2", "value.nPeople_3", "value.nPeople_4m",
    npeopleDT <- npeopleDT[, censusConstraint := "nPeople_4m"] # default (most complex to code)
    npeopleDT <- npeopleDT[`Number of usual residents in household` == "Total households",
                           censusConstraint := "npeople_totalHouseholds"] 
    npeopleDT <- npeopleDT[`Number of usual residents in household` %like% "One",
                           censusConstraint := "nPeople_1"] 
    npeopleDT <- npeopleDT[`Number of usual residents in household` %like% "Two",
                           censusConstraint := "nPeople_2"] 
    npeopleDT <- npeopleDT[`Number of usual residents in household`  %like% "Three",
                           censusConstraint := "nPeople_3"] 
    table(npeopleDT$`Number of usual residents in household`, npeopleDT$censusConstraint)
    
    # convert to wide
    npeopleDT <- npeopleDT[, count := Value]
    
    npeople2013WDT <- reshape(npeopleDT[Year == 2013, # helpful consistency of var names across NZ stats tables
                                  .(AU2013_code,AU2013_label,censusConstraint,count)], 
                              idvar = c("AU2013_code", "AU2013_label"),
                              timevar = "censusConstraint",
                              direction = "wide")
    
    setnames(npeople2013WDT, c("count.nPeople_1", "count.nPeople_2", "count.nPeople_3", "count.nPeople_4m", "count.npeople_totalHouseholds"), 
             c("nPeople_1", "nPeople_2", "nPeople_3", "nPeople_4m", "npeople_totalHouseholds"))
    
    # nRooms ----
    nroomsf <- paste0(sParams$dataPath, "raw/areaUnits/nRooms/TABLECODE8098_Data_62c5ce5c-23cf-44a2-b25e-b287fe9645e7.csv")
    outputMessage(paste0("Loading: ", nroomsf))
    nroomsDT <- data.table::fread(nroomsf)
    outputMessage(paste0("N rows loaded: ", dkUtils::tidyNum(nrow(nroomsDT))))
    nroomsDT <- nroomsDT[, AU2013_code := AREA]
    setkey(nroomsDT, AU2013_code)
    setkey(auListDT,AU2013_code) # set to back to code
    nroomsDT <- nroomsDT[auListDT]
    message("N unique area units (rooms data): ", uniqueN(nroomsDT$AU2013_code))
    
    # > create categories ----
    # "value.nRooms1_4", "value.nRooms5_6", "value.nRooms7m",
    nroomsDT <- nroomsDT[`Number of rooms` != "Not elsewhere included", censusConstraint := "nRooms7m"] # default (most complex to code)
    nroomsDT <- nroomsDT[`Number of rooms` == "Total dwellings stated",
                           censusConstraint := "nrooms_statedtotalHouseholds"] 
    nroomsDT <- nroomsDT[`Number of rooms` == "Total dwellings, number of rooms",
                           censusConstraint := "nrooms_totalHouseholds"] 
    nroomsDT <- nroomsDT[`Number of rooms` %like% "One" |
                           `Number of rooms` %like% "Two" |
                           `Number of rooms` %like% "Three" |
                           `Number of rooms` %like% "Four",
                           censusConstraint := "nRooms1_4"] 
    nroomsDT <- nroomsDT[`Number of rooms` %like% "Five" | 
                           `Number of rooms` %like% "Six" ,
                           censusConstraint := "nRooms5_6"] 
    table(nroomsDT$`Number of rooms`, nroomsDT$censusConstraint)
    
    # convert to wide
    nroomsDT <- nroomsDT[, count := Value]
    
    nrooms2013WDT <- reshape(nroomsDT[Year == 2013, # helpful consistency of var names across NZ stats tables
                                  .(AU2013_code,AU2013_label,censusConstraint,count)], 
                              idvar = c("AU2013_code", "AU2013_label"),
                              timevar = "censusConstraint",
                              direction = "wide")
    
    setnames(nrooms2013WDT, c("count.nRooms1_4", "count.nRooms5_6", "count.nRooms7m",
                              "count.nrooms_totalHouseholds", "count.nrooms_statedtotalHouseholds"), 
             c("nRooms1_4", "nRooms5_6", "nRooms7m",
               "nrooms_totalHouseholds", "nrooms_statedtotalHouseholds"))
    
    # combine them ----
    setkey(auListDT, AU2013_code, AU2013_label)
    setkey(fuel2013WDT, AU2013_code, AU2013_label)
    setkey(kids2013WDT, AU2013_code, AU2013_label)
    setkey(npeople2013WDT, AU2013_code, AU2013_label)
    setkey(nrooms2013WDT, AU2013_code, AU2013_label)
    
    au2013DT <- fuel2013WDT[auListDT]
    au2013DT <- kids2013WDT[au2013DT]
    au2013DT <- npeople2013WDT[au2013DT]
    au2013DT <- nrooms2013WDT[au2013DT]
    
    # recalulate n households without children
    au2013DT <- au2013DT[, nKids_0 :=  nrooms_totalHouseholds- nKids_1m]
    

    Figure @ref(fig:checkTotals) shows the match between the total household/family counts derived from each NZ stats table. As we would expect there are very minor differences with the exception of the totals derived from the families table (presence of children).

    pairsDT <- au2013DT[, .(fuel_totalHouseholds, nrooms_statedtotalHouseholds, nrooms_totalHouseholds,
                            npeople_totalHouseholds,fuel_totalStatedHouseholds, nkids_totalFamilies)]
    
    pairs(pairsDT)

    We focus on households/families/dwellings not individuals as the spatial microsimulation will operate at the household level.

    GREENGrid Survey data

    This comprises the household attributes (survey) data:

    f <- paste0(sParams$ggPath, "/safe/survey/ggHouseholdAttributesSafe_2019-04-09.csv") # latest - includes heat source & 
    outputMessage(paste0("Loading: ", f))
    ggHhDT <- data.table::as.data.table(readr::read_csv(f))
    outputMessage(paste0("N rows loaded: ", dkUtils::tidyNum(nrow(ggHhDT))))
    outputMessage(paste0("N households loaded: ", dkUtils::tidyNum(uniqueN(ggHhDT$linkID))))
    

    We need household & Census files with just the variables (constraints) we're going to use. This will take a bit of creative re-coding.

    Families: Presence of children

    Survey:

    • nChildren0_12: 0,1,2+
    • nTeenagers13_18: 0,1,2+

    Census:

    • 0,1+ (simplest)
    # survey: number of kids: categorised as: 1,2,3,4+ ----
    ggHhDT <- ggHhDT[, nKids :=  nChildren0_12 + nTeenagers13_18]
    table(ggHhDT$nKids, useNA = "always")
    message("There are NA - we will need to remove them later")
    ggHhDT <- ggHhDT[, nKids_0 := ifelse(nKids == 0, 1, 0)]
    ggHhDT <- ggHhDT[, nKids_1m := ifelse(nKids > 0 , 1, 0)]
    
    message("Household data: n kids")
    table(ggHhDT$nKids_0)
    table(ggHhDT$nKids_1m)
    
    # create category
    ggHhDT <- ggHhDT[, presenceKids := ifelse(nKids == 0, "None", "1+")]
    
    table(ggHhDT$nKids, ggHhDT$presenceKids, useNA = "always")
    

    Households: Number of people

    # survey: number of people: categorised as: 1,2,3,4+ ----
    ggHhDT <- ggHhDT[, nPeople := nAdults + nChildren0_12 + nTeenagers13_18]
    table(ggHhDT$nPeople, useNA = "always")
    message("There are NA - we will need to remove them later")
    ggHhDT <- ggHhDT[, nPeople_1 := ifelse(nPeople == 1, 1, 0)]
    ggHhDT <- ggHhDT[, nPeople_2 := ifelse(nPeople == 2, 1, 0)]
    ggHhDT <- ggHhDT[, nPeople_3 := ifelse(nPeople == 3, 1, 0)]
    ggHhDT <- ggHhDT[, nPeople_4m := ifelse(nPeople > 3, 1, 0)]
    
    message("Household data: n people")
    table(ggHhDT$nPeople_1)
    table(ggHhDT$nPeople_2)
    table(ggHhDT$nPeople_3)
    table(ggHhDT$nPeople_4m)
    
    # create category
    ggHhDT <- ggHhDT[, nPeopleCat := ifelse(nPeople_1 == 1, "1", NA)]
    ggHhDT <- ggHhDT[, nPeopleCat := ifelse(nPeople_2 == 1, "2", nPeopleCat)]
    ggHhDT <- ggHhDT[, nPeopleCat := ifelse(nPeople_3 == 1, "3", nPeopleCat)]
    ggHhDT <- ggHhDT[, nPeopleCat := ifelse(nPeople_4m == 1, "4+", nPeopleCat)]
    
    table(ggHhDT$nPeopleCat, ggHhDT$nPeople, useNA = "always")
    

    Dwellings: Number of rooms

    We have to do some imputation here

    # survey: n bedrooms via Q10 ----
    ggHhDT <- ggHhDT[, nRooms := `Q10#1_1_1_TEXT` + # bedrooms
                     `Q10#1_2_1_TEXT` + # living rooms
                     `Q10#1_3_1_TEXT` + # dining rooms-kitchens
                     `Q10#1_4_1_TEXT` + # kitchens
                     `Q10#1_5_1_TEXT`  # studies
                     # Q10#1_6_1_TEXT + # bathrooms - excluded from census http://archive.stats.govt.nz/Census/2013-census/info-about-2013-census-data/2013-census-definitions-forms/definitions/dwelling.aspx
                     # Q10#1_7_1_TEXT + # toilets - excluded from census
                     # Q10#1_8_1_TEXT + # laundries - excluded from census
                     ]
    table(ggHhDT$nRooms, useNA = "always")
    message("We're quite short of responses to nRooms - we will try to impute them from the number of adults and children using a regression model...")
    message("Test the model")
    r <- lm(ggHhDT$nRooms ~ ggHhDT$nAdults + ggHhDT$nKids)
    
    summary(r)
    
    # > impute where missing ----
    message("that looks reasonable...impute where missing")
    ggHhDT <- ggHhDT[, nRoomsImputed := predict(r, ggHhDT)] # this forces predict to update the NAs with an estimate
    
    # check what we got
    ggplot2::ggplot(ggHhDT, aes(x = nPeople, y = nRooms)) + 
      geom_point() # original shape
    
    ggplot2::ggplot(ggHhDT, aes(x = nPeople, y = nRoomsImputed)) + 
      geom_point()  + # imputed shape
      labs(caption = "Well, it is a linear model...")
    
    message("So we use the imputed values only where we had NA")
    ggHhDT <- ggHhDT[, nRoomsCorrected := ifelse(is.na(nRooms), 
                                                    round(nRoomsImputed),
                                                    nRooms
    )
    ]
    
    message("How bad does that look?")
    ggHhDT <- ggHhDT[, roomsImputed := ifelse(is.na(nRooms), "Imputed",
                                                 "observed")]
    ggplot2::ggplot(ggHhDT, aes(x = nRoomsImputed, y = nRoomsCorrected, colour = roomsImputed)) + 
      geom_jitter() # 
    
    message("Well, we can live with that for now...")
    
    #
    message("There are NA - we will need to remove them later")
    ggHhDT <- ggHhDT[, nRooms1_4 := ifelse(nRoomsCorrected < 5, 1, 0)]
    ggHhDT <- ggHhDT[, nRooms5_6 := ifelse(nRoomsCorrected > 4 & nRoomsCorrected < 7, 1, 0)]
    ggHhDT <- ggHhDT[, nRooms7m := ifelse(nRoomsCorrected > 6, 1, 0)]
    table(ggHhDT$nRooms1_4)
    table(ggHhDT$nRooms5_6)
    table(ggHhDT$nRooms7m)
    
    # create category
    ggHhDT <- ggHhDT[, nRoomsCat := ifelse(nRoomsCorrected < 5, "1-4", NA)]
    ggHhDT <- ggHhDT[, nRoomsCat := ifelse(nRoomsCorrected > 4 & nRoomsCorrected < 7, "5-6", nRoomsCat)]
    ggHhDT <- ggHhDT[, nRoomsCat := ifelse(nRoomsCorrected > 6, "7+", nRoomsCat)]
    
    message("Check coding")
    table(ggHhDT$nRoomsCat, ggHhDT$nRoomsCorrected, useNA = "always")
    

    Dwellings: Number of bedrooms

    Probably collinear with n people/n rooms. Also have to impute.

    
    # survey: Q10#1_1_1_TEXT ----
    
    ggHhDT <- ggHhDT[, nBedrooms := `Q10#1_1_1_TEXT`]
    table(ggHhDT$nBedrooms,  useNA = "always")
    # best to do 1-2, 3, 4m
    
    message("We're quite short of responses to nBedrooms - we will try to impute them from the number of people using a regression model...")
    message("Test the model")
    r <- lm(ggHhDT$nBedrooms ~ ggHhDT$nAdults + ggHhDT$nKids)
    
    summary(r)
    
    # > impute where missing ----
    message("that looks reasonable...impute where missing")
    ggHhDT <- ggHhDT[, nBedroomsImputed := predict(r, ggHhDT)] # this forces predict to update the NAs with an estimate
    
    # check what we got
    ggplot2::ggplot(ggHhDT, aes(x = nPeople, y = nBedrooms)) + 
      geom_point() # original shape
    
    ggplot2::ggplot(ggHhDT, aes(x = nPeople, y = nBedroomsImputed)) + 
      geom_point()  + # imputed shape
      labs(caption = "Well, it is a linear model...")
    
    message("So we use the imputed values only where we had NA and we round them to whole numbers")
    ggHhDT <- ggHhDT[, nBedroomsCorrected := ifelse(is.na(nBedrooms), 
                                                    round(nBedroomsImputed),
                                                    nBedrooms
    )
    ]
    
    message("How bad does that look?")
    ggHhDT <- ggHhDT[, bedroomsImputed := ifelse(is.na(nBedrooms), "Imputed",
                                                 "observed")]
    ggplot2::ggplot(ggHhDT, aes(x = nBedroomsImputed, y = nBedroomsCorrected, colour = bedroomsImputed)) + 
      geom_jitter() # 
    
    message("Well, we can live with that for now...")
    
    ggHhDT <- ggHhDT[, nBedrooms_1_2 := ifelse(nBedroomsCorrected < 3, 1, 0)]
    ggHhDT <- ggHhDT[, nBedrooms_3 := ifelse(nBedroomsCorrected == 3, 1, 0)]
    ggHhDT <- ggHhDT[, nBedrooms_4m := ifelse(nBedroomsCorrected > 3, 1, 0)]
    message("Household data: n bedrooms")
    table(ggHhDT$nBedrooms_1_2)
    table(ggHhDT$nBedrooms_3)
    table(ggHhDT$nBedrooms_4m)
    
    # create category
    ggHhDT <- ggHhDT[, nBedroomsCat := ifelse(nBedrooms_1_2 == 1, "1-4", NA)]
    ggHhDT <- ggHhDT[, nBedroomsCat := ifelse(nBedrooms_3 == 1, "5-6", nBedroomsCat)]
    ggHhDT <- ggHhDT[, nBedroomsCat := ifelse(nBedrooms_4m == 1, "7+", nBedroomsCat)]
    
    table(ggHhDT$nBedroomsCat, ggHhDT$nBedroomsCorrected, useNA = "always")
    

    Notice that the total counts for bedrooms are lower because they are counts of dwellings instead of households.

    Dwellings: Main heat source for hot water

    Survey - known Census - unknown

    Dwellings: Main fuel used for heat

    
    # survey: Q20 ----
    
    table(ggHhDT$Q20_coded,  useNA = "always")
    
    ggHhDT <- ggHhDT[, heatFuel := dplyr::recode(Q20_coded,
                                                 "Enclosed wood burner" = "Wood",
                                                 "Open fire" = "Wood", # could be coal
                                                 "Heat pump"= "Electricity",
                                                 "HRV or other ventilation system" = "Electricity",
                                                 "Portable electric heaters" = "Electricity",
                                                 "Portable gas heater" = "Gas", 
                                                 "Underfloor gas heating" = "Gas",
                                                 "Other" = "Other")]
    
    table(ggHhDT$Q20_coded,  ggHhDT$heatFuel, useNA = "always")
    ggHhDT <- ggHhDT[, heatSourceWood := ifelse(heatFuel == "Wood", 1,0)]
    ggHhDT <- ggHhDT[, heatSourceElectricity := ifelse(heatFuel == "Electricity", 1,0)]
    ggHhDT <- ggHhDT[, heatSourceGas := ifelse(heatFuel == "Gas", 1,0)]
    ggHhDT <- ggHhDT[, heatSourceCoal := ifelse(heatFuel == "Coal", 1,0)]
    ggHhDT <- ggHhDT[, heatSourceOther := ifelse(heatFuel == "Other", 1,0)]
    
    table(ggHhDT$heatFuel,  ggHhDT$Location, useNA = "always")

    Dwelling: Dwelling type

    Survey - not known Census - known

    Fix final survey data

    Filter survey data to include just the variables we want and fix any NAs.

    surveyDT <- ggHhDT[, .(linkID, 
                           nKids_0, nKids_1m,
                           nPeople_1, nPeople_2, nPeople_3, nPeople_4m,
                           nRooms1_4, nRooms5_6, nRooms7m,
                           heatSourceWood, heatSourceElectricity, heatSourceGas, heatSourceCoal, heatSourceOther,
                           nBedrooms_1_2, nBedrooms_3, nBedrooms_4m)]
    
    skimr::skim(surveyDT)
    
    message("There are still a few NAs so we need to remove these households")
    surveyDT <- na.omit(surveyDT)
    
    skimr::skim(surveyDT)
    message("Fixed it?")

    Select areas of interest

    Now select just the census data for:

    • Hawke's Bay Region
    • Taranaki Region
    censusAuWideDT <- au2013DT[REGC2013_label == "Hawke's Bay Region" |
                             REGC2013_label == "Taranaki Region"] 
    
    message("Total households (rooms table) = ", sum(censusAuWideDT$nrooms_totalHouseholds))
    
    message("Total households (fuel table) = ", sum(censusAuWideDT$fuel_totalHouseholds))
    
    message("Total households (npeople table) = ", sum(censusAuWideDT$npeople_totalHouseholds))
    
    message("Total families (kids table) = ", sum(censusAuWideDT$nkids_totalFamilies))
    
    t <- censusAuWideDT[, .(nHouseholdsRooms = sum(nrooms_totalHouseholds),
                            nHouseholdsFuel = sum(fuel_totalHouseholds),
                            nHouseholdsPeople = sum(npeople_totalHouseholds)), keyby = REGC2013_label]
    
    kableExtra::kable(t, caption = "Household counts by region (by table source)")