From 8b8e8a9dbf9461dba512eaaee733e9b28248b6ec Mon Sep 17 00:00:00 2001
From: Ben Anderson <dataknut@icloud.com>
Date: Sat, 2 Jul 2016 17:49:27 +0100
Subject: [PATCH] amended summary stats to include all data before keeping only
 UK

---
 MTUS-W6-adult-episodes-data-processing.Rmd | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/MTUS-W6-adult-episodes-data-processing.Rmd b/MTUS-W6-adult-episodes-data-processing.Rmd
index 824a81b..33452c7 100644
--- a/MTUS-W6-adult-episodes-data-processing.Rmd
+++ b/MTUS-W6-adult-episodes-data-processing.Rmd
@@ -101,11 +101,24 @@ Loading and processing `r efile`.
 system.time(
   mtusEpsDT <- as.data.table(read.spss(efile))
 )
+```
+
+We have loaded `r format(nrow(mtusEpsDT), big.mark=",",scientific=FALSE)` rows of data for `r format(uniqueN(mtusEpsDT$countrya), big.mark=",",scientific=FALSE)` countries.
+
+```{r basicStats}
+kable(caption = "Number of diaries per year",
+      table(droplevels(mtusEpsDT$countrya), # removes unused countries
+            mtusEpsDT$survey)
+      )
+```
+
+```{r keepUKOnly}
+mtusUKEpsDT <- subset(mtusEpsDT, countrya == "United Kingdom")
+```
 
-# keep UK only
-mtusUKEpsDT <- mtusEpsDT[countrya ==  "United Kingdom"] 
-mtusEpsDT <- NULL
+We now delete the non-UK data leaving us with `r format(nrow(mtusUKEpsDT), big.mark=",",scientific=FALSE)` rows of survey data.
 
+```{r setKeys}
 # This works but we'll create a concatenated id to make life easier
 setkey(mtusUKEpsDT, countrya, survey, swave, msamp, hldid, persid, id)
 
-- 
GitLab