From 77a237439ffeabc83b9cbd18f8b42b334277db66 Mon Sep 17 00:00:00 2001 From: Ben Anderson <dataknut@icloud.com> Date: Mon, 6 Dec 2021 22:32:37 +0000 Subject: [PATCH] postcode wrangling --- R/postcodeWrangling.R | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 R/postcodeWrangling.R diff --git a/R/postcodeWrangling.R b/R/postcodeWrangling.R new file mode 100644 index 0000000..b14ad12 --- /dev/null +++ b/R/postcodeWrangling.R @@ -0,0 +1,34 @@ +# postcodeWrangling.R: doing stuff with postcodes +library(data.table) # fast + +dp <- path.expand("~/Dropbox/data/UK_postcodes/") # where you keep your postcode data + +# load GOR region names ---- +df <- "NSPL_AUG_2020_UK/Documents/Region names and codes EN as at 12_10 (GOR).xlsx" +region_codes <- readxl::read_xlsx(paste0(dp,df)) +region_codes_dt <- data.table::as.data.table(region_codes) +data.table::setkey(region_codes_dt, GOR10CD) + +# derive postcode sectors & add GOR region names ---- + +# > 2016 ---- +df <- "~/Dropbox/data/UK_postcodes/NSPL_AUG_2016_UK_V2/Data/NSPL_AUG_2016_UK.csv.gz" +rawDT <- data.table::fread(df) +rawDT[, pcd_district := data.table::tstrsplit(pcds, " ", keep = c(1))] # the characters before the space are the postcode district +# should be 3088 (UK) + 26 = 3114 (UK + IoM & Channel Islands) +data.table::uniqueN(rawDT$pcd_district) +rawDT[, GOR10CD := gor] +data.table::setkey(rawDT, GOR10CD) +rawDT <- region_codes_dt[rawDT] # match on GOR codes & names +rawDT[osgrdind < 9, .(n = .N), keyby = .(gor,GOR10NM)] +# there are some rows without a GOR10NM (or gor) +head(rawDT[is.na(GOR10NM)]) +pcd_districts_2016_dt <- rawDT[doterm < 201608 & osgrdind < 9, # remove terminated & those without grid references + .(nPostcodes = .N), keyby = .(pcd_district, GOR10CD, GOR10NM)] +data.table::uniqueN(pcd_districts_2016_dt$pcd_district) +skimr::skim(pcd_districts_2016_dt) +# all postcode districts should now match to a GOR +table(pcd_districts_2016_dt$GOR10CD,pcd_districts_2016_dt$GOR10NM, useNA = "always") + +# save the file for future use +data.table::fwrite(pcd_districts_2016_dt, file = paste0(dp, "postcode_districts_2016.csv")) -- GitLab