Commit 77a23743 authored by Ben Anderson's avatar Ben Anderson
Browse files

postcode wrangling

parent 56e50b7c
# postcodeWrangling.R: doing stuff with postcodes
library(data.table) # fast
dp <- path.expand("~/Dropbox/data/UK_postcodes/") # where you keep your postcode data
# load GOR region names ----
df <- "NSPL_AUG_2020_UK/Documents/Region names and codes EN as at 12_10 (GOR).xlsx"
region_codes <- readxl::read_xlsx(paste0(dp,df))
region_codes_dt <- data.table::as.data.table(region_codes)
data.table::setkey(region_codes_dt, GOR10CD)
# derive postcode sectors & add GOR region names ----
# > 2016 ----
df <- "~/Dropbox/data/UK_postcodes/NSPL_AUG_2016_UK_V2/Data/NSPL_AUG_2016_UK.csv.gz"
rawDT <- data.table::fread(df)
rawDT[, pcd_district := data.table::tstrsplit(pcds, " ", keep = c(1))] # the characters before the space are the postcode district
# should be 3088 (UK) + 26 = 3114 (UK + IoM & Channel Islands)
data.table::uniqueN(rawDT$pcd_district)
rawDT[, GOR10CD := gor]
data.table::setkey(rawDT, GOR10CD)
rawDT <- region_codes_dt[rawDT] # match on GOR codes & names
rawDT[osgrdind < 9, .(n = .N), keyby = .(gor,GOR10NM)]
# there are some rows without a GOR10NM (or gor)
head(rawDT[is.na(GOR10NM)])
pcd_districts_2016_dt <- rawDT[doterm < 201608 & osgrdind < 9, # remove terminated & those without grid references
.(nPostcodes = .N), keyby = .(pcd_district, GOR10CD, GOR10NM)]
data.table::uniqueN(pcd_districts_2016_dt$pcd_district)
skimr::skim(pcd_districts_2016_dt)
# all postcode districts should now match to a GOR
table(pcd_districts_2016_dt$GOR10CD,pcd_districts_2016_dt$GOR10NM, useNA = "always")
# save the file for future use
data.table::fwrite(pcd_districts_2016_dt, file = paste0(dp, "postcode_districts_2016.csv"))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment