From 77a237439ffeabc83b9cbd18f8b42b334277db66 Mon Sep 17 00:00:00 2001
From: Ben Anderson <dataknut@icloud.com>
Date: Mon, 6 Dec 2021 22:32:37 +0000
Subject: [PATCH] postcode wrangling

---
 R/postcodeWrangling.R | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 R/postcodeWrangling.R

diff --git a/R/postcodeWrangling.R b/R/postcodeWrangling.R
new file mode 100644
index 0000000..b14ad12
--- /dev/null
+++ b/R/postcodeWrangling.R
@@ -0,0 +1,34 @@
+# postcodeWrangling.R: doing stuff with postcodes
+library(data.table) # fast
+
+dp <- path.expand("~/Dropbox/data/UK_postcodes/") # where you keep your postcode data
+
+# load GOR region names ----
+df <- "NSPL_AUG_2020_UK/Documents/Region names and codes EN as at 12_10 (GOR).xlsx"
+region_codes <- readxl::read_xlsx(paste0(dp,df))
+region_codes_dt <- data.table::as.data.table(region_codes)
+data.table::setkey(region_codes_dt, GOR10CD)
+
+# derive postcode sectors & add GOR region names ----
+
+# > 2016 ----
+df <- "~/Dropbox/data/UK_postcodes/NSPL_AUG_2016_UK_V2/Data/NSPL_AUG_2016_UK.csv.gz"
+rawDT <- data.table::fread(df)
+rawDT[, pcd_district := data.table::tstrsplit(pcds, " ", keep = c(1))] # the characters before the space are the postcode district
+# should be 3088 (UK) + 26 = 3114 (UK + IoM & Channel Islands)
+data.table::uniqueN(rawDT$pcd_district)
+rawDT[, GOR10CD := gor]
+data.table::setkey(rawDT, GOR10CD)
+rawDT <- region_codes_dt[rawDT] # match on GOR codes & names
+rawDT[osgrdind < 9, .(n = .N), keyby = .(gor,GOR10NM)]
+# there are some rows without a GOR10NM (or gor)
+head(rawDT[is.na(GOR10NM)])
+pcd_districts_2016_dt <- rawDT[doterm < 201608 & osgrdind < 9, # remove terminated & those without grid references
+                               .(nPostcodes = .N), keyby = .(pcd_district, GOR10CD, GOR10NM)]
+data.table::uniqueN(pcd_districts_2016_dt$pcd_district)
+skimr::skim(pcd_districts_2016_dt)
+# all postcode districts should now match to a GOR
+table(pcd_districts_2016_dt$GOR10CD,pcd_districts_2016_dt$GOR10NM, useNA = "always")
+
+# save the file for future use
+data.table::fwrite(pcd_districts_2016_dt, file = paste0(dp, "postcode_districts_2016.csv"))
-- 
GitLab