Skip to content
Snippets Groups Projects
Commit 77a23743 authored by Ben Anderson's avatar Ben Anderson
Browse files

postcode wrangling

parent 56e50b7c
No related branches found
No related tags found
No related merge requests found
# postcodeWrangling.R: doing stuff with postcodes
library(data.table) # fast
dp <- path.expand("~/Dropbox/data/UK_postcodes/") # where you keep your postcode data
# load GOR region names ----
df <- "NSPL_AUG_2020_UK/Documents/Region names and codes EN as at 12_10 (GOR).xlsx"
region_codes <- readxl::read_xlsx(paste0(dp,df))
region_codes_dt <- data.table::as.data.table(region_codes)
data.table::setkey(region_codes_dt, GOR10CD)
# derive postcode sectors & add GOR region names ----
# > 2016 ----
df <- "~/Dropbox/data/UK_postcodes/NSPL_AUG_2016_UK_V2/Data/NSPL_AUG_2016_UK.csv.gz"
rawDT <- data.table::fread(df)
rawDT[, pcd_district := data.table::tstrsplit(pcds, " ", keep = c(1))] # the characters before the space are the postcode district
# should be 3088 (UK) + 26 = 3114 (UK + IoM & Channel Islands)
data.table::uniqueN(rawDT$pcd_district)
rawDT[, GOR10CD := gor]
data.table::setkey(rawDT, GOR10CD)
rawDT <- region_codes_dt[rawDT] # match on GOR codes & names
rawDT[osgrdind < 9, .(n = .N), keyby = .(gor,GOR10NM)]
# there are some rows without a GOR10NM (or gor)
head(rawDT[is.na(GOR10NM)])
pcd_districts_2016_dt <- rawDT[doterm < 201608 & osgrdind < 9, # remove terminated & those without grid references
.(nPostcodes = .N), keyby = .(pcd_district, GOR10CD, GOR10NM)]
data.table::uniqueN(pcd_districts_2016_dt$pcd_district)
skimr::skim(pcd_districts_2016_dt)
# all postcode districts should now match to a GOR
table(pcd_districts_2016_dt$GOR10CD,pcd_districts_2016_dt$GOR10NM, useNA = "always")
# save the file for future use
data.table::fwrite(pcd_districts_2016_dt, file = paste0(dp, "postcode_districts_2016.csv"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment