diff --git a/MTUS-W6-adult-survey-data-processing.Rmd b/MTUS-W6-adult-survey-data-processing.Rmd index ff5b3b107bb1880afb32a3ce52d80ed7f67d6402..3aef45bc2abfd7633108f586f419d3e4cf582291 100644 --- a/MTUS-W6-adult-survey-data-processing.Rmd +++ b/MTUS-W6-adult-survey-data-processing.Rmd @@ -105,20 +105,27 @@ Things that are NOT fixed here: Loading `r sfile`. ```{r loadSurveyData} -MTUSW6UKsurvey_DT <- as.data.table(read.spss(sfile))[countrya == "United Kingdom"] # load UK only from SPSS +MTUSW6survey_DT <- as.data.table(read.spss(sfile)) # load from SPSS -setkey(MTUSW6UKsurvey_DT, countrya, survey, swave, msamp, hldid, persid, id) +setkey(MTUSW6survey_DT, countrya, survey, swave, msamp, hldid, persid, id) ``` -We have loaded `r length(MTUSW6UKsurvey_DT$id)` rows of data distributed as follows. +We have loaded `r format(nrow(MTUSW6survey_DT), big.mark=",",scientific=FALSE)` rows of data for `r format(uniqueN(MTUSW6survey_DT$countrya), big.mark=",",scientific=FALSE)` countries. ```{r basicStats} -kable(caption = "Number of cases per year", - table(MTUSW6UKsurvey_DT$survey, MTUSW6UKsurvey_DT$year, useNA = "always") +kable(caption = "Number of diaries per year", + table(droplevels(MTUSW6survey_DT$countrya), # removes unused countries + MTUSW6survey_DT$survey) ) ``` -# Process survey data +```{r keepUKOnly} +MTUSW6UKsurvey_DT <- subset(MTUSW6survey_DT, countrya == "United Kingdom") +``` + +We now delete the non-UK data leaving us with `r format(nrow(MTUSW6UKsurvey_DT), big.mark=",",scientific=FALSE)` rows of survey data. + +# Process UK survey data ```{r processSurveyData} print("-> Create uniq id for diaries (for matching) and persons")