From 452245e80a54fc3974e0e5dba1aae4f8035b8dfa Mon Sep 17 00:00:00 2001 From: Ben Anderson <dataknut@icloud.com> Date: Tue, 27 Jun 2017 18:11:27 +0100 Subject: [PATCH] tidied numbers; latest run --- MTUS-W6-adult-survey-data-processing.Rmd | 2 +- MTUS-W6-adult-survey-data-processing.html | 8 +++++--- MTUS-W6-adult-survey-data-processing.md | 7 +++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/MTUS-W6-adult-survey-data-processing.Rmd b/MTUS-W6-adult-survey-data-processing.Rmd index 4b4ae19..e077258 100644 --- a/MTUS-W6-adult-survey-data-processing.Rmd +++ b/MTUS-W6-adult-survey-data-processing.Rmd @@ -289,7 +289,7 @@ print("-> Creating new ba_survey variable to pool 1983/7") # Save out processed file -This includes duplicate records where the respondent completed more than one diary-day. As an indicator, there are `r uniqueN(MTUSW6UKsurvey_DT$ba_pid)` unique respondents but `r uniqueN(MTUSW6UKsurvey_DT$ba_diarypid)` records. Most importantly it means that `propwt` is not necessarily constant within ba_pid as it is a per-diary-day individual level weight. +This includes duplicate records where the respondent completed more than one diary-day. As an indicator, there are `r ba_tidyNum(uniqueN(MTUSW6UKsurvey_DT$ba_pid))` unique respondents but `r ba_tidyNum(uniqueN(MTUSW6UKsurvey_DT$ba_diarypid))` records. Most importantly it means that `propwt` is not necessarily constant within ba_pid as it is a per-diary-day individual level weight. ```{r saveSurveyFile} # Keep the survey vars we need ---- diff --git a/MTUS-W6-adult-survey-data-processing.html b/MTUS-W6-adult-survey-data-processing.html index 41fbe5f..f875f53 100644 --- a/MTUS-W6-adult-survey-data-processing.html +++ b/MTUS-W6-adult-survey-data-processing.html @@ -118,7 +118,7 @@ $(document).ready(function () { <h1 class="title toc-ignore">MTUS World 6 Survey Data Processing</h1> <h4 class="author"><em>Ben Anderson (<a href="mailto:b.anderson@soton.ac.uk">b.anderson@soton.ac.uk</a>, <a href="mailto:/@dataknut">/@dataknut</a>)</em></h4> -<h4 class="date"><em>Last run at: 2017-05-23 12:58:25</em></h4> +<h4 class="date"><em>Last run at: 2017-06-27 18:11:19</em></h4> </div> @@ -523,6 +523,7 @@ $(document).ready(function () { <pre class="r"><code>print("-> Create uniq id for diaries (for matching) and persons")</code></pre> <pre><code>## [1] "-> Create uniq id for diaries (for matching) and persons"</code></pre> <pre class="r"><code># Create unique ids ---- +# re-use the same function as for the episode data as, strangely, the survey data has persid & id MTUSW6UKsurvey_DT <- ba_MTUScreateIds(MTUSW6UKsurvey_DT) t <- MTUSW6UKsurvey_DT[, .("Number of rows" = .N, @@ -911,6 +912,7 @@ kable(caption = "Days data collected (day may be incorrect)", </div> <div id="save-out-processed-file" class="section level1"> <h1><span class="header-section-number">6</span> Save out processed file</h1> +<p>This includes duplicate records where the respondent completed more than one diary-day. As an indicator, there are 21,038 unique respondents but 58,945 records. Most importantly it means that <code>propwt</code> is not necessarily constant within ba_pid as it is a per-diary-day individual level weight.</p> <pre class="r"><code># Keep the survey vars we need ---- print("-> Keeping core survey variables")</code></pre> <pre><code>## [1] "-> Keeping core survey variables"</code></pre> @@ -1579,7 +1581,7 @@ print("-> Keeping core survey variables")</code></pre> </tbody> </table> <p>As we can see 1974-1987 were full week diaries. 2001 was a two day diary and 1995/2005 were one-day dairies.</p> -<p>From this point on in this section we use only unique individual records. Note that results do not necessarily match the number of cases recorded in the <a href="http://www.timeuse.org/MTUS-User-Guide.html">MTUS user guide</a> as the user guide includes all cases (i.e. both adults and children).</p> +<p>From this point on in this section we use only unique individual records to avoid duplicates where more than 1 diary day was completed. Note that results do not necessarily match the number of cases recorded in the <a href="http://www.timeuse.org/MTUS-User-Guide.html">MTUS user guide</a> as the user guide includes all cases (i.e. both adults and children).</p> <pre class="r"><code>setkey(MTUSW6UKsurveyCore_DT, ba_pid) gMTUSW6UKsurveyCoreUniq_DT <- unique(MTUSW6UKsurveyCore_DT)</code></pre> </div> @@ -2784,7 +2786,7 @@ pub_etc <- lm(main39 ~ survey + mtus_month + ba_age_r + ba_nchild + hhtype, d </div> <div id="about" class="section level1"> <h1><span class="header-section-number">9</span> About</h1> -<p>Analysis completed in: 41.108 seconds using <a href="https://cran.r-project.org/package=knitr">knitr</a> in <a href="http://www.rstudio.com">RStudio</a>.</p> +<p>Analysis completed in: 34.391 seconds using <a href="https://cran.r-project.org/package=knitr">knitr</a> in <a href="http://www.rstudio.com">RStudio</a>.</p> </div> <div id="references" class="section level1 unnumbered"> <h1>References</h1> diff --git a/MTUS-W6-adult-survey-data-processing.md b/MTUS-W6-adult-survey-data-processing.md index ca8983d..d1f442b 100644 --- a/MTUS-W6-adult-survey-data-processing.md +++ b/MTUS-W6-adult-survey-data-processing.md @@ -125,6 +125,7 @@ print("-> Create uniq id for diaries (for matching) and persons") ```r # Create unique ids ---- +# re-use the same function as for the episode data as, strangely, the survey data has persid & id MTUSW6UKsurvey_DT <- ba_MTUScreateIds(MTUSW6UKsurvey_DT) t <- MTUSW6UKsurvey_DT[, .("Number of rows" = .N, @@ -411,6 +412,8 @@ print("-> Creating new ba_survey variable to pool 1983/7") # Save out processed file +This includes duplicate records where the respondent completed more than one diary-day. As an indicator, there are 21,038 unique respondents but 58,945 records. Most importantly it means that `propwt` is not necessarily constant within ba_pid as it is a per-diary-day individual level weight. + ```r # Keep the survey vars we need ---- @@ -592,7 +595,7 @@ NA 0 0 0 0 0 0 0 As we can see 1974-1987 were full week diaries. 2001 was a two day diary and 1995/2005 were one-day dairies. -From this point on in this section we use only unique individual records. Note that results do not necessarily match the number of cases recorded in the [MTUS user guide](http://www.timeuse.org/MTUS-User-Guide.html) as the user guide includes all cases (i.e. both adults and children). +From this point on in this section we use only unique individual records to avoid duplicates where more than 1 diary day was completed. Note that results do not necessarily match the number of cases recorded in the [MTUS user guide](http://www.timeuse.org/MTUS-User-Guide.html) as the user guide includes all cases (i.e. both adults and children). ```r @@ -1420,6 +1423,6 @@ On the basis of these results we seem justified in assuming that we can pool 198 # About -Analysis completed in: 41.108 seconds using [knitr](https://cran.r-project.org/package=knitr) in [RStudio](http://www.rstudio.com). +Analysis completed in: 34.391 seconds using [knitr](https://cran.r-project.org/package=knitr) in [RStudio](http://www.rstudio.com). # References -- GitLab