Skip to content
Snippets Groups Projects
Commit 9992a12f authored by Ben Anderson's avatar Ben Anderson
Browse files

added code to run IPF model and produce generic weights

parent c90866b1
No related branches found
No related tags found
No related merge requests found
# Runs ipf for survey & census inputs
# There are intricacies so it is not easy to convert to a function
require(data.table)
require(ipfp)
library(spatialec) # for parameters etc
spatialec::setup()
# Expects: ----
# local area labels
areasDT <- data.table::fread(p_Params$areasTable2013)
auListDT <- areasDT[, .(nMBs = .N), keyby = .(AU2013_code, AU2013_label, REGC2013_label)]
auListDT <- auListDT[, AU2013_code := as.character(AU2013_code)] # for easier matching
setkey(auListDT, AU2013_code)
# > an area level constraints dt : censusDT ----
# use default pre-processed
censusf <- paste0(p_Params$dataPath, "processed/2013IpfInput.csv")
censusDT <- data.table::fread(censusf)
censusDT[, AU2013_code := as.character(AU2013_code)] # saves a lot of confusion
# > a survey constraints dt (vars in same order) : surveyDT ----
# use defualt pre-processed
surveyf <- paste0(p_Params$ggPath, "/safe/survey/ggIpfInput.csv")
surveyDT <- data.table::fread(surveyf)
# > a dt of area codes ----
areaCodesDT <- censusDT[, .(AU2013_code)]
# > a dt of household ids ----
hhIdsDT <- surveyDT[, .(linkID)]
# Set the constraints & set order ----
# relies heavily on: http://robinlovelace.net/spatial-microsim-book/smsim-in-R.html
# ipfp wants:
# y = numeric vector of constraints for each area - the names must match the survey, data as counts, no ids or anything
# which means we have to assume ipfp maintains order
# A = the transposed survey data (row = var) in dummy variable form - names must match constraints, no ids etc, variables that match constraints only
# which means we have to assume ipfp maintains order
# x0 = numeric initial vector of length ncol
# set order of variables - has to match (probably)
fullConstraintsList <- c("heatSourceWood", "heatSourceElectricity", "heatSourceGas", "heatSourceCoal", "heatSourceOther",
"nKids_0", "nKids_1m",
#"nBedrooms_1_2", "nBedrooms_3", "nBedrooms_4m",
"nRooms1_4", "nRooms5_6", "nRooms7m",
"nPeople_1", "nPeople_2", "nPeople_3", "nPeople_4m" # fits last
)
# coal = 0 might break the IPF
testConstraintsList <- c(#"heatSourceWood","heatSourceElectricity","heatSourceGas","heatSourceCoal","heatSourceOther",
"nKids_0", "nKids_1m",
#"nBedrooms_1_2", "nBedrooms_3", "nBedrooms_4m",
"nRooms1_4","nRooms5_6","nRooms7m",
"nPeople_1", "nPeople_2", "nPeople_3", "nPeople_4m" # fits last
)
# Set up census constraints ----
# check totals are not 0
#censusDT <- censusDT[, nBedrooms_Total := nBedrooms_1_2 + nBedrooms_3 + nBedrooms_4m] # not yet added
censusDT <- censusDT[, nPeople_Total := nPeople_1 + nPeople_2 + nPeople_3 + nPeople_4m]
censusDT <- censusDT[, nRooms_Total := nRooms1_4 + nRooms5_6 + nRooms7m]
censusDT <- censusDT[, nKids_Total := nKids_0 + nKids_1m]
censusDT <- censusDT[, heatSource_Total := heatSourceWood + heatSourceElectricity + heatSourceGas + heatSourceCoal + heatSourceOther]
t <- summary(au2013DT[, .SD, .SDcols = names(au2013DT) %like% "_Total"])
t
message("Removing areas which have total counts for any constraint = 0")
nOrig <- nrow(censusDT)
zerosDT <- censusDT[nPeople_Total == 0 | is.na(nPeople_Total)|
#nBedrooms_Total == 0 | is.na(nBedrooms_Total)|
nRooms_Total == 0 | is.na(nRooms_Total)|
nKids_Total == 0 | is.na(nKids_Total)|
heatSource_Total == 0 | is.na(heatSource_Total),
.(AU2013_code = as.character(AU2013_code), nRooms_Total,nPeople_Total, nKids_Total,
heatSource_Total
)
]
setkey(zerosDT, AU2013_code)
setkey(areaCodesDT, AU2013_code)
zerosDT <- areaCodesDT[zerosDT]
zerosDT <- zerosDT[, drop := 1]
test <- zerosDT[censusDT]
# Remove areas where count is 0 or NA ----
censusDT <- test[is.na(drop)]
nNonZero <- nrow(censusDT)
finalAreaCodes <- censusDT[, AU2013_code] # need this as a list
message("Removed ", nOrig - nNonZero, " areas where the household totals were 0 or NA.")
# Select just the constraints we want to use ----
print(paste0("Using ", testConstraintsList))
ipfInputSurveyDT <- surveyDT[,testConstraintsList , with=FALSE]
message("Input survey constraints:")
names(ipfInputSurveyDT)
ipfInputConstraintsDT <- censusDT[, testConstraintsList, with=FALSE]
message("Input census constraints:")
names(ipfInputConstraintsDT)
# Run IPF ----
# returns a long form weights dt with oacodes labelled
# must be numeric
sdt <- apply(ipfInputSurveyDT, 2, function(x) as.numeric(x))
surveyInputDT <- data.table::as.data.table(sdt)
# transpose for ipf
surv_dtt <- t(surveyInputDT) # <- needs to be numeric too (not integers) - not sure why
# set up initial vector of the same length as the number of households as a load of 1s (could be the response weights...)
x0 <- rep(1, nrow(surveyInputDT))
# census constraints can't be zero
cdt <- apply(ipfInputConstraintsDT, 2, function(x) ifelse(x == 0, 0.001, x))
censusInputDT <- data.table::as.data.table(cdt)
# create 2D weighst matrix (households by areas) - this will get filled with the weights
weights = array(NA, dim=c(nrow(surveyInputDT), nrow(censusInputDT)))
for(i in 1:ncol(weights)){ # for each area (cols of weights)
print(paste0("Running ipfp on area ", i))
weights[,i] <- ipfp::ipfp(as.numeric(cdt[i,]), # only accepts a vector of numbers
surv_dtt,
x0,
maxit = 10, # maxit = 10 iterations (for now),
verbose = F) # careful with verbose - too much output
}
# Process results ----
# this is where we have to assume ipfp maintained the oaCode order
wDT <- as.data.table(weights) # convert to dt
wDT <- setnames(wDT, as.character(finalAreaCodes)) # add col names
# this is where we have to assume ipfp maintained the bmg_id order
dt <- cbind(hhIdsDT, wDT) # add linkID to each row of weights (fills down, if you get recycling then there is a problem)
setkey(dt, linkID)
# need to reshape the weightsDT into 1 column of weights with oacode set to oacode etc
longFormDT <- as.data.table(melt(dt, id=c("linkID")))
# add correct area code name for future matching
longFormDT <- setnames(longFormDT, c("variable", "value"), c("AU2013_code", "ipfWeight"))
setkey(longFormDT,AU2013_code)
wDT <- NULL # save mem
#> add region ----
longFormDT <- auListDT[longFormDT]
kableExtra::kable(caption = "First few rows/columns of ipf weights table", head(longFormDT))
kableExtra::kable(caption = "Summary of ipf weights table", summary(longFormDT))
message("Dimensions of ipf weights table: ", dkUtils::tidyNum(nrow(longFormDT)), " rows x ", ncol(longFormDT), " cols")
# > remove zero weight cases ----
message("N rows before removing zero weights: ", nrow(longFormDT))
longFormDT <- longFormDT[ipfWeight > 0]
message("N rows after removing zero weights: ", nrow(longFormDT))
# save the results for future use since they won't change unless we change the constraints ----
data.table::fwrite(longFormDT, paste0(p_Params$ggPath, "/safe/ipf/nonZeroWeightsAu2013.csv"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment