diff --git a/analysis/GREENGridModel/_createSyntheticCensus.Rmd b/analysis/GREENGridModel/_createSyntheticCensus.Rmd index 1be771e45122c51fd74aed5699a2d14b1166e359..847c8ef155a595b229f159855bf9b47c15e1b35f 100644 --- a/analysis/GREENGridModel/_createSyntheticCensus.Rmd +++ b/analysis/GREENGridModel/_createSyntheticCensus.Rmd @@ -51,28 +51,28 @@ Check census data distributions - we're looking for zeros. # do this here so we only have the relevant regions # > heat source ---- -t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "heat"]) # https://stackoverflow.com/questions/30189979/select-columns-of-data-table-based-on-regex +t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "heat"]) # https://stackoverflow.com/questions/30189979/select-columns-of-data-table-based-on-regex kableExtra::kable(t, caption = "Test distribution of fuel sources") %>% kable_styling() # > n kids ---- -t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "nKids"]) +t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "nKids"]) kableExtra::kable(t, caption = "Test distribution of nKids") %>% kable_styling() # > n people ---- -t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "nPeople"]) +t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "nPeople"]) kableExtra::kable(t, caption = "Test distribution of nPeople") %>% kable_styling() # > n rooms ---- -t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "nRooms"]) +t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "nRooms"]) kableExtra::kable(t, caption = "Test distribution of nRooms") %>% kable_styling() @@ -82,18 +82,18 @@ kableExtra::kable(t, caption = "Test distribution of nRooms") %>% ```{r censusSetup} # check totals are not 0 -#censusAuWideDT <- censusAuWideDT[, nBedrooms_Total := nBedrooms_1_2 + nBedrooms_3 + nBedrooms_4m] -censusAuWideDT <- censusAuWideDT[, nPeople_Total := nPeople_1 + nPeople_2 + nPeople_3 + nPeople_4m] -censusAuWideDT <- censusAuWideDT[, nRooms_Total := nRooms1_4 + nRooms5_6 + nRooms7m] -censusAuWideDT <- censusAuWideDT[, nKids_Total := nKids_0 + nKids_1m] -censusAuWideDT <- censusAuWideDT[, heatSource_Total := heatSourceWood + heatSourceElectricity + heatSourceGas + heatSourceCoal + heatSourceOther] +#au2013DT <- au2013DT[, nBedrooms_Total := nBedrooms_1_2 + nBedrooms_3 + nBedrooms_4m] +au2013DT <- au2013DT[, nPeople_Total := nPeople_1 + nPeople_2 + nPeople_3 + nPeople_4m] +au2013DT <- au2013DT[, nRooms_Total := nRooms1_4 + nRooms5_6 + nRooms7m] +au2013DT <- au2013DT[, nKids_Total := nKids_0 + nKids_1m] +au2013DT <- au2013DT[, heatSource_Total := heatSourceWood + heatSourceElectricity + heatSourceGas + heatSourceCoal + heatSourceOther] -t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "_Total"]) +t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "_Total"]) t message("Removing areas which have total counts for any constraint = 0") -nOrig <- nrow(censusAuWideDT) -zerosDT <- censusAuWideDT[nPeople_Total == 0 | is.na(nPeople_Total)| +nOrig <- nrow(au2013DT) +zerosDT <- au2013DT[nPeople_Total == 0 | is.na(nPeople_Total)| #nBedrooms_Total == 0 | is.na(nBedrooms_Total)| nRooms_Total == 0 | is.na(nRooms_Total)| nKids_Total == 0 | is.na(nKids_Total)| @@ -107,7 +107,7 @@ setkey(zerosDT, AU2013_code) zerosDT <- auListDT[zerosDT] zerosDT <- zerosDT[, drop := 1] -test <- zerosDT[censusAuWideDT] +test <- zerosDT[au2013DT] # remove areas where count is 0 or NA censusDT <- test[is.na(drop)] @@ -236,9 +236,12 @@ It is possible that some of the weights are 0. This means there are households w message("N rows before removing zero weights: ", nrow(longFormDT)) longFormDT <- longFormDT[ipfWeight > 0] message("N rows after removing zero weights: ", nrow(longFormDT)) - ``` +```{r saveData} +# save the results for future use since they won't change unless we change the constraints +data.table::fwrite(longFormDT, paste0(sParams$ggPath, "/safe/ipf/nonZeroWeightsAu2013.csv")) +``` We now need to add the survey-based attributes back (from the GREENGrid survey). If we had a much larger sample of households and a lot more areas we would not do this here as it would create a very large file. @@ -504,7 +507,7 @@ setnames(wdt, c("N.Electricity", "N.Gas", "N.Other", "N.Wood"), setkey(wdt, AU2013_code) -tmpDT <- wdt[censusAuWideDT] # merge to census data +tmpDT <- wdt[au2013DT] # merge to census data tmpDT <- auListDT[tmpDT] # add region labels plotDT <- tmpDT[, .(AU2013_code, REGC2013_label, AU2013_label,