Skip to content
Snippets Groups Projects
Commit 403dca21 authored by Ben Anderson's avatar Ben Anderson
Browse files

amended census build

parent 9e0b3730
No related branches found
No related tags found
No related merge requests found
...@@ -51,28 +51,28 @@ Check census data distributions - we're looking for zeros. ...@@ -51,28 +51,28 @@ Check census data distributions - we're looking for zeros.
# do this here so we only have the relevant regions # do this here so we only have the relevant regions
# > heat source ---- # > heat source ----
t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "heat"]) # https://stackoverflow.com/questions/30189979/select-columns-of-data-table-based-on-regex t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "heat"]) # https://stackoverflow.com/questions/30189979/select-columns-of-data-table-based-on-regex
kableExtra::kable(t, caption = "Test distribution of fuel sources") %>% kableExtra::kable(t, caption = "Test distribution of fuel sources") %>%
kable_styling() kable_styling()
# > n kids ---- # > n kids ----
t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "nKids"]) t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "nKids"])
kableExtra::kable(t, caption = "Test distribution of nKids") %>% kableExtra::kable(t, caption = "Test distribution of nKids") %>%
kable_styling() kable_styling()
# > n people ---- # > n people ----
t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "nPeople"]) t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "nPeople"])
kableExtra::kable(t, caption = "Test distribution of nPeople") %>% kableExtra::kable(t, caption = "Test distribution of nPeople") %>%
kable_styling() kable_styling()
# > n rooms ---- # > n rooms ----
t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "nRooms"]) t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "nRooms"])
kableExtra::kable(t, caption = "Test distribution of nRooms") %>% kableExtra::kable(t, caption = "Test distribution of nRooms") %>%
kable_styling() kable_styling()
...@@ -82,18 +82,18 @@ kableExtra::kable(t, caption = "Test distribution of nRooms") %>% ...@@ -82,18 +82,18 @@ kableExtra::kable(t, caption = "Test distribution of nRooms") %>%
```{r censusSetup} ```{r censusSetup}
# check totals are not 0 # check totals are not 0
#censusAuWideDT <- censusAuWideDT[, nBedrooms_Total := nBedrooms_1_2 + nBedrooms_3 + nBedrooms_4m] #au2013DT <- au2013DT[, nBedrooms_Total := nBedrooms_1_2 + nBedrooms_3 + nBedrooms_4m]
censusAuWideDT <- censusAuWideDT[, nPeople_Total := nPeople_1 + nPeople_2 + nPeople_3 + nPeople_4m] au2013DT <- au2013DT[, nPeople_Total := nPeople_1 + nPeople_2 + nPeople_3 + nPeople_4m]
censusAuWideDT <- censusAuWideDT[, nRooms_Total := nRooms1_4 + nRooms5_6 + nRooms7m] au2013DT <- au2013DT[, nRooms_Total := nRooms1_4 + nRooms5_6 + nRooms7m]
censusAuWideDT <- censusAuWideDT[, nKids_Total := nKids_0 + nKids_1m] au2013DT <- au2013DT[, nKids_Total := nKids_0 + nKids_1m]
censusAuWideDT <- censusAuWideDT[, heatSource_Total := heatSourceWood + heatSourceElectricity + heatSourceGas + heatSourceCoal + heatSourceOther] au2013DT <- au2013DT[, heatSource_Total := heatSourceWood + heatSourceElectricity + heatSourceGas + heatSourceCoal + heatSourceOther]
t <- summary(censusAuWideDT[, .SD, .SDcols = names(censusAuWideDT) %like% "_Total"]) t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "_Total"])
t t
message("Removing areas which have total counts for any constraint = 0") message("Removing areas which have total counts for any constraint = 0")
nOrig <- nrow(censusAuWideDT) nOrig <- nrow(au2013DT)
zerosDT <- censusAuWideDT[nPeople_Total == 0 | is.na(nPeople_Total)| zerosDT <- au2013DT[nPeople_Total == 0 | is.na(nPeople_Total)|
#nBedrooms_Total == 0 | is.na(nBedrooms_Total)| #nBedrooms_Total == 0 | is.na(nBedrooms_Total)|
nRooms_Total == 0 | is.na(nRooms_Total)| nRooms_Total == 0 | is.na(nRooms_Total)|
nKids_Total == 0 | is.na(nKids_Total)| nKids_Total == 0 | is.na(nKids_Total)|
...@@ -107,7 +107,7 @@ setkey(zerosDT, AU2013_code) ...@@ -107,7 +107,7 @@ setkey(zerosDT, AU2013_code)
zerosDT <- auListDT[zerosDT] zerosDT <- auListDT[zerosDT]
zerosDT <- zerosDT[, drop := 1] zerosDT <- zerosDT[, drop := 1]
test <- zerosDT[censusAuWideDT] test <- zerosDT[au2013DT]
# remove areas where count is 0 or NA # remove areas where count is 0 or NA
censusDT <- test[is.na(drop)] censusDT <- test[is.na(drop)]
...@@ -236,9 +236,12 @@ It is possible that some of the weights are 0. This means there are households w ...@@ -236,9 +236,12 @@ It is possible that some of the weights are 0. This means there are households w
message("N rows before removing zero weights: ", nrow(longFormDT)) message("N rows before removing zero weights: ", nrow(longFormDT))
longFormDT <- longFormDT[ipfWeight > 0] longFormDT <- longFormDT[ipfWeight > 0]
message("N rows after removing zero weights: ", nrow(longFormDT)) message("N rows after removing zero weights: ", nrow(longFormDT))
``` ```
```{r saveData}
# save the results for future use since they won't change unless we change the constraints
data.table::fwrite(longFormDT, paste0(sParams$ggPath, "/safe/ipf/nonZeroWeightsAu2013.csv"))
```
We now need to add the survey-based attributes back (from the GREENGrid survey). If we had a much larger sample of households and a lot more areas we would not do this here as it would create a very large file. We now need to add the survey-based attributes back (from the GREENGrid survey). If we had a much larger sample of households and a lot more areas we would not do this here as it would create a very large file.
...@@ -504,7 +507,7 @@ setnames(wdt, c("N.Electricity", "N.Gas", "N.Other", "N.Wood"), ...@@ -504,7 +507,7 @@ setnames(wdt, c("N.Electricity", "N.Gas", "N.Other", "N.Wood"),
setkey(wdt, AU2013_code) setkey(wdt, AU2013_code)
tmpDT <- wdt[censusAuWideDT] # merge to census data tmpDT <- wdt[au2013DT] # merge to census data
tmpDT <- auListDT[tmpDT] # add region labels tmpDT <- auListDT[tmpDT] # add region labels
plotDT <- tmpDT[, .(AU2013_code, REGC2013_label, AU2013_label, plotDT <- tmpDT[, .(AU2013_code, REGC2013_label, AU2013_label,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment