moved laundry paper code to DEMAND repo

b4909639 · Ben Anderson · 9b96ec21 · b4909639 · b4909639 · b4909639
Commit b4909639 authored Feb 13, 2018 by Ben Anderson
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v1.0-adult.R
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v1.0-adult.R
+############################################
+# Time Use data analysis for 'Laundry' paper  
+# Use MTUS World 6 time-use data (UK subset) to examine:
+# - distributions of laundry in 1975 & 2005
+# - changing laundry practices
+# Data source: www.timeuse.org/mtus
+# data already in long format (but episodes)
+# This work was funded by RCUK through the End User Energy Demand Centres Programme via the
+# "DEMAND: Dynamics of Energy, Mobility and Demand" Centre (www.demand.ac.uk, gow.epsrc.ac.uk/NGBOViewGrant.aspx?GrantRef=EP/K011723/1)
+#     Copyright (C) 2014  University of Southampton
+#     Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut)
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License 
+# (http://choosealicense.com/licenses/gpl-2.0/), or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# clear out all old objects etc to avoid confusion
+rm(list = ls()) 
+# add libraries
+library("lattice", "Hmisc")
+# set up some useful vars
+ifile_d <- c("~/Documents/Work/Data/Social Science Datatsets/MTUS/World 6/processed/MTUS-adult-episode-UK-only-wf.dta")
+ifile_s <- c("~/Documents/Work/Data/Social Science Datatsets/MTUS/World 6/processed/MTUS-adult-aggregate-UK-only-wf.dta")
+rpath <- c("~/Documents/Work/Projects/RCUK-DEMAND/Theme 1/results/MTUS")
+# paste!
+# Loading the data
+# load as stata file
+library(foreign)
+# diary
+MTUSW6UK_d <- read.dta(ifile_d)
+# survey
+MTUSW6UK_s <- read.dta(ifile_s)
+# create a reduced survey frame with the few variables we need so the merge
+# does not break memory
+MTUSW6UK_s_redvars <- c("diarypid", "empstat", "urban")
+MTUSW6UK_s_red <- MTUSW6UK_s[MTUSW6UK_s_redvars]
+# merge
+MTUSW6UK_m <- merge(MTUSW6UK_d,MTUSW6UK_s_red,by="diarypid")
+# Take a quick look at the data
+head(MTUSW6UK_m)
+# Check what's in it?
+names(MTUSW6UK_m)
+# check the distribution of episodes by time of day and year of survey
+# NB: stata has already set the date to 1960! We need to re-format to half hours!
+table("Half hour"= MTUSW6UK_m$s_halfhour)
+# 21 = "laundry, ironing, clothing repair"
+# We've imported from stata so we'll have to use the value label not the value
+# Could have imported as csv and then applied labels... might be easier
+# check incidence of laundry as primary & secondary act
+table("Laundry as primary"= MTUSW6UK_m$main == "laundry, ironing, clothing repair")
+table("Laundry as secondary"= MTUSW6UK_m$sec == "laundry, ironing, clothing repair")
+# create 2 new variables (columns) which are 'laundry'
+MTUSW6UK_m$laundry_p <- 0
+MTUSW6UK_m$laundry_p[MTUSW6UK_m$main == "laundry, ironing, clothing repair"] <- 1 
+MTUSW6UK_m$laundry_s <- 0
+MTUSW6UK_m$laundry_s[MTUSW6UK_m$sec == "laundry, ironing, clothing repair"] <- 1 
+MTUSW6UK_m$laundry_all <- 0
+MTUSW6UK_m$laundry_all[MTUSW6UK_m$laundry_p == 1 | MTUSW6UK_m$laundry_s == 1] <- 1
+table(MTUSW6UK_m$laundry_all)
+# check location of laundry
+# lact = -1 (unknown), 1 = home, 2 = elsewhere
+table("Laundry as primary"= MTUSW6UK_m$laundry_p == 1, MTUSW6UK_m$eloc)
+table("Laundry as secondary"= MTUSW6UK_m$laundry_s == 1, MTUSW6UK_m$eloc)
+# create a frame to hold the various results
+# NB the value of the column (x) is meaningless
+laundry_fr <- aggregate(MTUSW6UK_m$year, by=list(MTUSW6UK_m$s_halfhour), FUN=mean)
+names(laundry_fr) <- c("s_halfhour","junk") 
+# drop junk
+laundry_fr <- laundry_fr["s_halfhour"]
+# there must be a simple way to do this as a loop switching p for s and all
+# primary
+laundry_p_tod <- aggregate(MTUSW6UK_m$laundry_p, by=list(MTUSW6UK_m$s_halfhour), FUN=sum)
+names(laundry_p_tod) <- c("s_halfhour","freq") 
+# each half hour as a proportion of laundry episodes
+laundry_fr$p_laundry_pr <- (laundry_p_tod$freq/sum(laundry_p_tod$freq))
+# secondary
+laundry_s_tod <- aggregate(MTUSW6UK_m$laundry_s, by=list(MTUSW6UK_m$s_halfhour), FUN=sum)
+names(laundry_s_tod) <- c("s_halfhour","freq") 
+# each half hour as a proportion of laundry episodes
+laundry_fr$s_laundry_pr <- (laundry_s_tod$freq/sum(laundry_s_tod$freq))
+# all
+laundry_all_tod <- aggregate(MTUSW6UK_m$laundry_all, by=list(MTUSW6UK_m$s_halfhour), FUN=sum)
+names(laundry_all_tod) <- c("s_halfhour","freq") 
+# each half hour as a proportion of laundry episodes
+laundry_fr$all_laundry_pr <- (laundry_all_tod$freq/sum(laundry_all_tod$freq))
+# plot with primary & secondary for all years
+# direct graph to file
+png(paste(rpath,"/laundry-time-of-day-all-years.png", sep=""))
+plot(x = laundry_fr$s_halfhour, y = laundry_fr$p_laundry_pr,
+     xlab = "Half Hour", 
+     ylab = "% of laundry of that type", 
+     type = "l",
+     col = "red")
+points(x = laundry_fr$s_halfhour, y = laundry_fr$s_laundry_pr, type = "l")
+# cex = scaling factor
+legend('topright',c("Primary act","Secondary act"), lty=1, col=c('red', 'black'), bty='n', cex=1)
+title("% of laundry done at different times of day (all years)", cex=0.75)
+dev.off()
+# laundry for each year - how to loop over?
+# make subsets to speed things up
+MTUSW6UK_m1974 <- subset(MTUSW6UK_m,survey==1974)
+laundry_tod_1974p <- aggregate(MTUSW6UK_m1974$laundry_p, by=list(MTUSW6UK_m1974$s_halfhour), FUN=sum)
+names(laundry_tod_1974p) <- c("s_halfhour","freq") 
+laundry_fr$laundry_p_1974_pr <- (laundry_tod_1974p$freq/sum(laundry_tod_1974p$freq))
+laundry_tod_1974s <- aggregate(MTUSW6UK_m1974$laundry_s, by=list(MTUSW6UK_m1974$s_halfhour), FUN=sum)
+names(laundry_tod_1974s) <- c("s_halfhour","freq") 
+laundry_fr$laundry_s_1974_pr <- (laundry_tod_1974s$freq/sum(laundry_tod_1974s$freq))
+laundry_tod_1974all <- aggregate(MTUSW6UK_m1974$laundry_s, by=list(MTUSW6UK_m1974$s_halfhour), FUN=sum)
+names(laundry_tod_1974all) <- c("s_halfhour","freq") 
+laundry_fr$laundry_all_1974_pr <- (laundry_tod_1974all$freq/sum(laundry_tod_1974all$freq))
+MTUSW6UK_m2005 <- subset(MTUSW6UK_m,survey==2005)
+laundry_tod_2005p <- aggregate(MTUSW6UK_m2005$laundry_p, by=list(MTUSW6UK_m2005$s_halfhour), FUN=sum)
+names(laundry_tod_2005p) <- c("s_halfhour","freq") 
+laundry_fr$laundry_p_2005_pr <- (laundry_tod_2005p$freq/sum(laundry_tod_2005p$freq))
+laundry_tod_2005s <- aggregate(MTUSW6UK_m2005$laundry_s, by=list(MTUSW6UK_m2005$s_halfhour), FUN=sum)
+names(laundry_tod_2005s) <- c("s_halfhour","freq") 
+laundry_fr$laundry_s_2005_pr <- (laundry_tod_2005s$freq/sum(laundry_tod_2005s$freq))
+laundry_tod_2005all <- aggregate(MTUSW6UK_m2005$laundry_all, by=list(MTUSW6UK_m2005$s_halfhour), FUN=sum)
+names(laundry_tod_2005all) <- c("s_halfhour","freq") 
+laundry_fr$laundry_all_2005_pr <- (laundry_tod_2005all$freq/sum(laundry_tod_2005all$freq))
+# now compare laundry for 1974 & 2005
+# must be a simple way to loop over these
+# direct graph to file
+# primary episodes
+png(paste(rpath,"/laundry-time-of-day-1974-2005-primary.png", sep=""))
+plot(x = laundry_fr$s_halfhour, y = laundry_fr$laundry_p_1974_pr,
+     xlab = "Half Hour", 
+     ylab = "Proportion of laundry of that type", 
+     pch = 1,
+     col = "red")
+points(x = laundry_fr$s_halfhour, y = laundry_fr$laundry_p_2005_pr, col = "blue", pch=2)
+# cex = scaling factor
+legend('topright',c("Primary act 1974","Primary act 2005"), 
+      col=c('red', 'blue'), pch=c(1,2), cex=1)
+title("% of laundry done at different times of day (1974-2005)", cex=0.75)
+dev.off()
+# secondary episodes
+png(paste(rpath,"/laundry-time-of-day-1974-2005-secondary.png", sep=""))
+plot(x = laundry_fr$s_halfhour, y = laundry_fr$laundry_s_1974_pr,
+     xlab = "Half Hour", 
+     ylab = "Proportion of laundry of that type", 
+     pch = 1,
+     col = "red")
+points(x = laundry_fr$s_halfhour, y = laundry_fr$laundry_s_2005_pr, col = "blue", pch = 2)
+# cex = scaling factor
+legend('topright',c("Secondary act 1974","Secondary act 2005"), 
+       col=c('red','blue'), pch=c(1,2), cex=1)
+title("% of laundry done at different times of day (1974-2005)", cex=0.75)
+dev.off()
+# all laundry episodes
+png(paste(rpath,"/laundry-time-of-day-1974-2005-all.png", sep=""))
+plot(x = laundry_fr$s_halfhour, y = laundry_fr$laundry_all_1974_pr,
+     xlab = "Half Hour", 
+     ylab = "Proportion of laundry of that type", 
+     pch = 1,
+     col = "red")
+points(x = laundry_fr$s_halfhour, y = laundry_fr$laundry_all_2005_pr, col = "blue", pch = 2)
+# cex = scaling factor
+legend('topright',c("All laundry 1974","All laundry 2005"), 
+       col=c('red','blue'), pch=c(1,2), cex=1)
+title("% of laundry done at different times of day (1974-2005)", cex=0.75)
+dev.off()
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis.R
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis.R
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis.Rmd
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis.Rmd
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis.html
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis.html
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis.md
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis.md
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-1.png
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-1.png
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-2.png
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-2.png
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-3.png
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-3.png
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-4.png
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-4.png
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-5.png
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-analysis_files/figure-html/analyseMtusEpisodes-5.png
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-data-processing.R
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-data-processing.R
--- a/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-gender-over-time.R
+++ b/Theme-1/laundryPaper/DEMAND-BA-MTUS-W6-Laundry-gender-over-time.R
+# Begin header ###########################################
+# Use MTUS World 6 time-use data (UK subset) to examine:
+# - distributions of laundry in 1985 -> 2005
+# - for discussions with @tulliajack re comparisons with Sweden
+# Data source: www.timeuse.org/mtus
+# data already in long format (but episodes) processed using
+# DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-v2.0-adult-data-processing.R
+# This work was funded by RCUK through the End User Energy Demand Centres Programme via the
+# "DEMAND: Dynamics of Energy, Mobility and Demand" Centre (www.demand.ac.uk, gow.epsrc.ac.uk/NGBOViewGrant.aspx?GrantRef=EP/K011723/1)
+#     Copyright (C) 2014  University of Southampton
+#     Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut)
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License 
+# (http://choosealicense.com/licenses/gpl-2.0/), or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# End header ###########################################
+# MTUS codes of interest: Main/Sec21 Laundry, ironing, clothing repair
+# Defined as:
+# 1983/4/7: 
+# <- 0701 Wash clothes, hang out / bring in washing
+# 	0702 Iron clothes
+# 	0801 Repair, upkeep of clothes
+# => may over-estimate laundry
+# 1995	14 Clothes
+# => may over-estimate laundry
+# 2000	3300 Unspecified making and care for textiles
+#       3310 Laundry
+#       3320 Ironing
+#       3390 Other specified making and care for textiles
+# => may over-estimate laundry
+# 2005	Pact=7 (washing clothes)
+# Housekeeping ----
+# clear out all old objects etc to avoid confusion
+rm(list = ls()) 
+# set up some useful data paths
+tudpath <- "~/Documents/Work/Data/MTUS/World_6/processed/" # presume processed data is already in here
+epsfile <- "gMTUSW6UKdiaryEps_DT.csv" # need to change to processed csv
+survfile <- "gMTUSW6UKsurveyCore_DT.csv.gz"
+rpath <- "~/Documents/Work/Projects/RCUK_DEMAND/Theme 1/results/MTUS/"
+# define laundry for time use data
+laundry <- "laundry, ironing, clothing repair"
+# Generic functions ----
+# load packages
+loadPackages <- function() {
+  # add packages
+  library(foreign) # as loading stata files
+  library(lattice) # fancy graphs
+  library(ggplot2) # fancy graphs II
+  library(data.table) # why use anything else?
+  library(survey) # weighted survey analysis
+  library(car) # regression diagnostics
+  library(gmodels) # nice crosstabs
+  library(broom) # turns stats objects into dataframes - useful for table output
+  library(fasttime) # VERY fast time string conversion to POSIXct 
+  # but only IF the input string is in a fixed format - see http://rforge.net/doc/packages/fasttime/fastPOSIXct.html
+}
+# Feedback function - cos I can't be bothered to keep writing it out
+feedBack <- function(string) {
+  print(paste0("Feedback: ", string))
+}
+# Functions for loading pre-processed data
+loadCoreMtusSurvey <- function() {
+  cmd <- paste0("gunzip -c ", tudpath, survfile)
+  print(paste0("Loading via ", cmd))
+  # read in the gzipped file using gunzip to 'pipe' the file to fread
+  # this is a lot faster than non-piped gunzip then fread and you get feedback from fread
+  # Don't forget to globalise!
+  gMTUSW6UKsurveyCore_DT <<- fread(cmd, 
+                                   stringsAsFactors = FALSE) 
+  feedBack("Done loading TU survey data")
+} # works
+loadMtusEpisodes <- function() {
+  cmd <- paste0("gunzip -c ", tudpath, epsfile)
+  print(paste0("Loading via ", cmd))
+  # read in the gzipped file using gunzip to 'pipe' the file to fread
+  # this is a lot faster than non-piped gunzip then fread and you get feedback from fread
+  # Don't forget to globalise!
+  gMTUSW6UKdiaryEps_DT <<- fread(cmd, 
+                                   stringsAsFactors = FALSE) 
+  feedBack("Done loading TU survey data")
+}
+# Controller
+loadPackages()
+loadCoreMtusSurvey()
+loadMtusEpisodes()
+setkey(gMTUSW6UKdiaryEps_DT, diarypid)
+setkey(gMTUSW6UKsurveyCore_DT, diarypid)
+gMTUSW6UKdiaryEps_DT$laundry_p <- ifelse(gMTUSW6UKdiaryEps_DT$main == laundry,
+                                         1, # laundry as main act
+                                         0)
+gMTUSW6UKdiaryEps_DT$laundry_s <- ifelse(gMTUSW6UKdiaryEps_DT$sec == laundry,
+                                         1, # laundry as main act
+                                         0)
+gMTUSW6UKdiaryEps_DT$laundry_all <- ifelse(gMTUSW6UKdiaryEps_DT$main == laundry | gMTUSW6UKdiaryEps_DT$sec == laundry,
+                                           1, # laundry as either act
+                                           0)
+# merge keeping only good cases
+gMTUSW6UKdiaryEpsMerged_DT <- gMTUSW6UKdiaryEps_DT[gMTUSW6UKsurveyCore_DT[gMTUSW6UKsurveyCore_DT$badcase == "good case"]]
+# checks
+with(gMTUSW6UKdiaryEpsMerged_DT,
+     table(sex, useNA = c("always"))
+)
+with(gMTUSW6UKdiaryEpsMerged_DT,
+     table(badcase, useNA = c("always"))
+)
+with(gMTUSW6UKdiaryEpsMerged_DT,
+     summary(propwt)
+)
+svygMTUSW6UKdiaryEpsMerged_DT <- svydesign(ids = ~diarypid, 
+                                 weight = ~propwt, 
+                                 data = gMTUSW6UKdiaryEpsMerged_DT # all data
+) # does not produce a data table
+# mean duration of laundry episodes
+svyby(~time, # the data to summarise
+      ~ba_survey + sex, # the row * columns we want
+      svygMTUSW6UKdiaryEpsMerged_DT[
+        svygMTUSW6UKdiaryEpsMerged_DT$variables$laundry_all == 1], # the data in survey form
+      svymean # the function to use to summarise
+)
+# total duration of laundry episodes
+svyby(~time, # the data to summarise
+      ~ba_survey + sex, # the row * columns we want
+      svygMTUSW6UKdiaryEpsMerged_DT[
+        svygMTUSW6UKdiaryEpsMerged_DT$variables$laundry_all == 1], # the data in survey form
+      svytotal # the function to use to summarise
+)
+laundrySummaryByPersonDTp <- gMTUSW6UKdiaryEpsMerged_DT[laundry_p == 1, 
+                                                     .(
+                                                       laundry_p_minutes = sum(time)
+                                                     ), 
+                                                     by = .(
+                                                       diarypid
+                                                     )
+                                                     ]
+setkey(laundrySummaryByPersonDTp, diarypid)
+laundrySummaryByPersonDTs <- gMTUSW6UKdiaryEpsMerged_DT[laundry_s == 1, 
+                                                        .(
+                                                          laundry_s_minutes = sum(time)
+                                                        ), 
+                                                        by = .(
+                                                          diarypid
+                                                        )
+                                                        ]
+setkey(laundrySummaryByPersonDTs, diarypid)
+laundrySummaryByPersonDT <- merge(laundrySummaryByPersonDTp,
+                                  laundrySummaryByPersonDTs, all = TRUE) # keep all
+laundrySummaryByPersonDT$laundry_p_minutes <- ifelse(
+  is.na(laundrySummaryByPersonDT$laundry_p_minutes),
+  0,
+  laundrySummaryByPersonDT$laundry_p_minutes
+  )
+laundrySummaryByPersonDT$laundry_s_minutes <- ifelse(
+  is.na(laundrySummaryByPersonDT$laundry_s_minutes),
+  0,
+  laundrySummaryByPersonDT$laundry_s_minutes
+  )
+laundrySummaryByPersonDT$total_laundry <- laundrySummaryByPersonDT$laundry_p_minutes + 
+  laundrySummaryByPersonDT$laundry_s_minutes
+# merge back to survey data
+laundrySummaryByPersonDT <- merge(laundrySummaryByPersonDT,
+                                  gMTUSW6UKsurveyCore_DT) # keep matches
+print("Set survey data")
+# tell survey that the diarypids are the ids (they repeat)
+svyLaundrySummaryByPersonDT <- svydesign(ids = ~diarypid, 
+                                             weight = ~propwt, 
+                                             data = laundrySummaryByPersonDT # laundry only
+                                           ) # does not produce a data table
+# reporting laundry by gender?
+# any laundry
+svytable(~ba_survey + sex , # the row * columns we want
+         svyLaundrySummaryByPersonDT # the data in survey form
+)
+# mean total time spent on laundry (primary)
+# XX not correct? XX
+svyby(~laundry_p_minutes, # the data to summarise
+      ~ba_survey + sex, # the row * columns we want
+      svyLaundrySummaryByPersonDT, # the data in survey form
+      svymean, # the function to use to summarise
+      na.rm = TRUE
+)
+# mean total time spent on laundry (primary)
+svyby(~laundry_s_minutes, # the data to summarise
+      ~ba_survey + sex, # the row * columns we want
+      svyLaundrySummaryByPersonDT, # the data in survey form
+      svymean, # the function to use to summarise
+      na.rm = TRUE
+)
--- a/Theme-1/laundryPaper/DEMAND-BA-ons-2005-laundry-data-exploration.R
+++ b/Theme-1/laundryPaper/DEMAND-BA-ons-2005-laundry-data-exploration.R
+# Header ###########################################
+# Time Use data analysis for 'Laundry' paper  
+#
+# Use ONS UK Time Use Survey 2005 to examine:
+# - distributions of laundry in 2005
+#
+# Data source: http://discover.ukdataservice.ac.uk/catalogue/?sn=5592
+# Data already in long format (but 10 minute slots)
+#
+# This work was funded by RCUK through the End User Energy Demand Centres Programme via the
+# "DEMAND: Dynamics of Energy, Mobility and Demand" Centre (www.demand.ac.uk, gow.epsrc.ac.uk/NGBOViewGrant.aspx?GrantRef=EP/K011723/1)
+#
+# Copyright (C) 2014  University of Southampton
+# 
+# Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
+# [Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
+# 
+# The MIT License (MIT) applies - see https://github.com/dataknut/
+#
+# end header
+# To do: -----------------------------------------------------------------
+# Prelims -----------------------------------------------------------------
+# clear out all old objects etc to avoid confusion
+rm(list = ls()) 
+# set time
+starttime <- proc.time()
+# load required packages
+# foreign - not needed if we are loading csv files
+# NB: this will fail if you do not have internet access
+# on OS X at least it asks if you want to re-start R first, click no
+packagel <- c("ggplot2","plyr")
+install.packages(packagel)
+lapply(packagel, require, character.only = T)
+# path to data & results
+# where's the data?
+dpath <- "~/Documents/Work/Data/Social Science Datatsets/Time Use 2005/processed/"
+# where do you want the results to go?
+rpath <- "~/Documents/Work/Projects/RCUK-DEMAND/Theme 1/results/ONS TU 2005"
+# time axis defnition
+# can't get this to work!
+# halfhourlab <- "\"04:00\",\"06:00\", \"08:00\", \"10:00\", \"12:00\", \"14:00\", \"16:00\",\"18:00\",\"20:00\",\"22:00\""
+# Load long form data -----------------------------------------------------------------
+# Time use data in long form - this has data in 10 minute time 'slots'
+# It also has a few survey variables attached to each time use slot
+tu2005data <- read.csv(paste0(dpath,"UK-2005-TU-merged-long-reduced.csv"))
+# Now stop to check what's in it and make sure we understand the format!
+head(tu2005data)
+# check values of main acts (the things people reported doing) by location
+all_acts_by_location <- table("Main acts"= tu2005data$pact)
+# ouptput to a csv file so we can keep for reference (useful later)
+write.csv(all_acts_by_location, paste0(rpath,"all_acts_by_location-table.csv"), row.names=FALSE, na="")
+# check values of months variable (so we see that seasons are represented)
+table("Month"= tu2005data$t_month)
+# recode month so it is easier to interpret
+tu2005data$t_month[tu2005data$t_month == 2] <- "February"
+tu2005data$t_month[tu2005data$t_month == 6] <- "June"
+tu2005data$t_month[tu2005data$t_month == 9] <- "September"
+tu2005data$t_month[tu2005data$t_month == 11] <- "November"
+# set the order of the month factor
+tu2005data$t_month <- factor(tu2005data$t_month, 
+  levels = c("February","June","September","November"))
+# re-check
+table("Month"= tu2005data$t_month)
+# check the days
+table("Days"= tu2005data$s_dow)
+# Out of order!
+# set the order of the dow factor
+tu2005data$s_dow <- factor(tu2005data$s_dow, 
+  levels = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))
+# recheck
+table("Days"= tu2005data$s_dow)
+# Test: all acts -----------------------------------------------------------------
+# add a dummy variable we can count
+tu2005data$count <- 1
+# create a table which counts the occurences of 'pact' in each 10 minute slot
+all_acts <- ddply(tu2005data, c("s_starttime","pact"), summarise, count=sum(count))
+# draw an unintelligible line graph using the table
+all_acts_lplot <- ggplot(all_acts, aes(x=s_starttime, y=count, colour=pact, group=pact)) + geom_line()
+all_acts_lplot + xlab("Time of Day") + ylab("N reporting") + 
+  labs(colour="Activity") +
+  scale_x_discrete(breaks=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30"),
+                   labels=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30")) +
+  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1)) +
+  theme(legend.position="right")
+# save the plot
+ggsave(paste0(rpath,"all_acts_tod_lineplot.pdf"), width=12, height=8, unit="cm", dpi=300) 
+# and an unintelligible stacked chart
+all_acts_stplot <- ggplot(all_acts, aes(x=s_starttime, y=count, fill=pact, group=pact)) + geom_area()
+all_acts_stplot + xlab("Time of Day") + ylab("N reporting") + 
+  labs(fill="Activity") +
+  scale_x_discrete(breaks=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30"),
+                   labels=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30")) +
+  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1)) +
+  theme(legend.position="right")
+# save the plot
+ggsave(paste0(rpath,"all_acts_tod_stackedplot.pdf"), width=12, height=8, unit="cm", dpi=300) 
+# What we really want to do is to create a new column with primary acts where:
+# - all travel is collapsed to 1 code
+# - the '/' are removed to make graph saving easier later
+# Something like:
+tu2005data$pact_c <- as.character(tu2005data$pact)
+tu2005data$pact_nc <- gsub("/", " or ", tu2005data$pact_c)
+# find the travel
+tu2005data$pact_t <- grepl("travel", tu2005data$pact_c)
+tu2005data$sact_c <- as.character(tu2005data$sact)
+tu2005data$sact_nc <- gsub("/", " or ", tu2005data$sact_c)
+# find the travel
+tu2005data$sact_t <- grepl("travel", tu2005data$sact_c)
+# set travel
+tu2005data$pact_nc[tu2005data$pact_t == TRUE] <- "travel"
+tu2005data$sact_nc[tu2005data$sact_t == TRUE] <- "travel"
+# convert back to factors
+tu2005data$pact_nf <- as.factor(tu2005data$pact_nc)
+tu2005data$sact_nf <- as.factor(tu2005data$sact_nc)
+table(tu2005data$pact_nf)
+# now re-try the stacked chart - there are fewer categories (but still a lot!)
+all_acts_nf <- ddply(tu2005data, c("s_starttime","pact_nf"), summarise, count=sum(count))
+all_acts_stplotn <- ggplot(all_acts_nf, aes(x=s_starttime, y=count, fill=pact_nf, group=pact_nf)) + geom_area()
+all_acts_stplotn + xlab("Time of Day") + ylab("N reporting") + 
+  labs(fill="Activity") +
+  scale_x_discrete(breaks=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30"),
+                   labels=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30")) +
+  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1)) +
+  theme(legend.position="right")
+# well that worked slightly better but it's still fairly illegible, further grouping required!!
+# save the plot
+ggsave(paste0(rpath,"all_acts_nf_tod_stackedplot.pdf"), width=12, height=8, unit="cm", dpi=300) 
+# Practices: Laundry -----------------------------------------------------------------
+# Interesting of itself but we also want to try to compare the results with the HES data
+# set our y axis label
+ylabt <- "laundry"
+tu2005data$laundry_all <- 0
+# we're interested in laundry at home (for now!)
+tu2005data$laundry_all[tu2005data$pact == "washing clothes" & 
+                         tu2005data$lact != "elsewhere" |
+                         tu2005data$sact == "washing clothes" & 
+                         tu2005data$lact != "elsewhere"] <- 1 
+# make the table
+laundry <- ddply(tu2005data, c("s_dow", "s_halfhour"), summarise, 
+                 n=sum(count),
+                 pc=mean(laundry_all),
+                 sd=sd(laundry_all))
+# CI for propn
+# +/- (1.96 ∗ sqrt(p∗(1−p)/n))
+laundry$se <- sqrt(laundry$pc*(1-laundry$pc)/laundry$n)
+laundry$ci <- 1.96 * laundry$se
+# plot it
+laundry_plot <- ggplot(laundry, aes(x=s_halfhour, y=pc, colour=s_dow, group=s_dow)) + geom_line()
+laundry_plot + xlab("Time of Day") + 
+  ylab(paste("% reporting", ylabt)) + 
+  labs(colour="Day of the week") +
+  scale_x_discrete(breaks=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30"),
+                   labels=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30")) +
+  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1)) +
+  facet_wrap( ~ s_dow) +
+  geom_errorbar(aes(ymin=pc-ci, ymax=pc+ci), width=.2)
+# save the plot
+ggsave(paste0(rpath,"laundry_ci_tod_dow_plot.pdf"), width=12, height=8, unit="cm", dpi=300)
+# try a contour plot/heat map to make day of the week easier to see
+laundry_hmplot <- ggplot(laundry, aes(x=s_halfhour, y=s_dow, fill=pc))
+laundry_hmplot + geom_raster() + xlab("Time of Day") + 
+  ylab("Day of week") +
+  labs(fill=paste("% reporting", ylabt)) +
+  theme(legend.position="bottom") +
+  scale_x_discrete(breaks=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30"),
+                   labels=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30")) +
+  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1))
+# save the plot
+ggsave(paste0(rpath,"laundry_tod_dow_hmplot.pdf"), width=12, height=8, unit="cm", dpi=300)
+# now try by age group to analyse differences
+laundry_age <- ddply(tu2005data, c("agegrp", "s_halfhour"), summarise, pc=100*mean(laundry_all))
+laundry_plot <- ggplot(laundry_age, aes(x=s_halfhour, y=pc, colour=agegrp, group=agegrp)) + geom_line()
+laundry_plot + xlab("Time of Day") + 
+  ylab(paste("% reporting", ylabt)) + 
+  labs(colour="Age group") +
+  scale_x_discrete(breaks=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30"),
+                   labels=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30")) +
+  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1))
+# save the plot
+ggsave(paste0(rpath,"laundry_tod_by_age_plot.pdf"), width=12, height=8, unit="cm", dpi=300)
+# working status
+laundry_wrk <- ddply(tu2005data, c("wrking", "s_halfhour", "s_dow"), summarise, 
+                     n=sum(count),
+                     pc=mean(laundry_all),
+                     sd=sd(laundry_all))
+# CI for propn
+# +/- (1.96 ∗ sqrt(p∗(1−p)/n))
+laundry_wrk$se <- sqrt(laundry$pc*(1-laundry$pc)/laundry$n)
+laundry_wrk$ci <- 1.96 * laundry$se
+laundry_plot <- ggplot(laundry_wrk, aes(x=s_halfhour, y=pc, colour=wrking, group=wrking)) + geom_line()
+laundry_plot + xlab("Time of Day") + 
+  ylab(paste("% reporting", ylabt)) + 
+  labs(colour="Working status") +
+  scale_x_discrete(breaks=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30"),
+                   labels=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30")) +
+  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1)) +
+  geom_errorbar(aes(ymin=pc-ci, ymax=pc+ci), width=.2) +
+  facet_wrap( ~ s_dow) +
+  theme(legend.position=c(0.5,0.2))
+# save the plot
+ggsave(paste0(rpath,"laundry_tod_by_working_plot.pdf"), width=12, height=8, unit="cm", dpi=300)
+# To compare with the HES data on 'washing/drying' we need to create a table by weekend ('holiday') vs weekday
+# And it needs to have 10 minute time slots as the HES data is in 10 minute chunks
+tu2005data$weekend <- "Weekday"
+tu2005data$weekend[tu2005data$s_dow == "Saturday" | tu2005data$s_dow == "Sunday"] <- "Weekend"
+# check
+table(tu2005data$s_dow,tu2005data$weekend)
+laundry_hes <- ddply(tu2005data, c("weekend", "s_starttime"), summarise, pc=100*mean(laundry_all))
+laundry_hes_plot <- ggplot(laundry_hes, aes(x=s_starttime, y=pc, colour=weekend, group=weekend)) + geom_line()
+laundry_hes_plot + xlab("Time of Day") + 
+  ylab(paste("% reporting", ylabt)) + 
+  labs(colour="Weekday/Weekend") +
+  scale_x_discrete(breaks=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30"),
+                   labels=c("00:00","04:00","08:00","12:00","16:00","20:00","23:30")) +
+  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1))
+ggsave(paste0(rpath,"laundry_tod_hes_plot.pdf"), width=12, height=8, unit="cm", dpi=300)
+# to get the data on the same graph as the HES results we need to export the table we made
+# -> csv with blank cells where na
+# NB this is long form - we could switch it to wide form to make it easier
+write.csv(laundry_hes, paste0(rpath,"laundry_tod_hes_compare_data.csv"), row.names=FALSE, na="")
+print("Done!")
+# stop clock - how long did that take?
+proc.time() - starttime
\ No newline at end of file
--- a/Theme-1/laundryPaper/LICENSE
+++ b/Theme-1/laundryPaper/LICENSE
+GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+                            NO WARRANTY
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    {description}
+    Copyright (C) {year}  {fullname}
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+Also add information on how to contact you by electronic and paper mail.
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+  {signature of Ty Coon}, 1 April 1989
+  Ty Coon, President of Vice
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
--- a/Theme-1/laundryPaper/README.md
+++ b/Theme-1/laundryPaper/README.md
+# DEMAND: Dynamics of Energy, Mobility and Demand
+Unless otherwise indicated this work was funded by RCUK through the End User Energy Demand Centres Programme via the "DEMAND: Dynamics of Energy, Mobility and Demand" Centre:
+ * http://www.demand.ac.uk
+ * http://gtr.rcuk.ac.uk/project/0B657D54-247D-4AD6-9858-64E411D3D06C
+# DEMAND_Laundry
+Analysis for a paper on the changing practices of laundry using UK Time-Use data 1985-2005.
+Paper: https://eprints.soton.ac.uk/400478/
+### Terms of Use
+GPL: V2 - http://choosealicense.com/licenses/gpl-2.0/
+See license file for details.
+[YMMV](http://en.wiktionary.org/wiki/YMMV)
--- a/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Change-Over-Time-v2.0-adult.do
+++ b/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Change-Over-Time-v2.0-adult.do
--- a/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Energy-Time-As-Submitted-v1.0.do
+++ b/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Energy-Time-As-Submitted-v1.0.do
+*******************************************
+* Script to use a number of datasets to examine:
+* - distributions of laundry in 1975 & 2005
+* - changing laundry practices
+* uses:
+* - MTUS World 6 time-use data (www.timeuse.org/mtus UK subset) - data already in long format (but episodes)
+* - EFS 2005-6 to analyse uptake of washers/dryers
+* - SPRG water practices survey
+* This work was funded by RCUK through the End User Energy Demand Centres Programme via the
+* "DEMAND: Dynamics of Energy, Mobility and Demand" Centre (www.demand.ac.uk, gow.epsrc.ac.uk/NGBOViewGrant.aspx?GrantRef=EP/K011723/1)
+/*   
+Copyright (C) 2014  University of Southampton
+Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
+	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License 
+(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+#YMMV - http://en.wiktionary.org/wiki/YMMV
+*/
+clear all
+* change these to run this script on different PC
+* use globals so can re-run parts of the script
+global where "~/Documents/Work"
+global droot "$where/Data/Social Science Datatsets"
+* LCFS/EFS
+global efspath "$droot/Expenditure and Food Survey/processed"
+* MTUS
+global mtuspath "$droot/MTUS/World 6/processed"
+* SPRG
+global sprgpath "$where/Projects/ESRC-SPRG/WP4-Micro_water/data/sprg_survey/data/safe/v6"
+* where to put results
+global proot "$where/Projects/RCUK-DEMAND/Theme 1"
+global rpath "$proot/results/MTUS"
+* version
+global version = "v1.0-all-locs"
+* weights the final counts
+* which subgroup of mtus are we interested in?
+global mtusfilter "_all"
+capture log close
+log using "$rpath/DEMAND-BA-MTUS-W6-Laundry-Change-Over-Time-$version-adult.smcl", replace
+* control what gets done
+local do_halfhour_samples = 1
+* make script run without waiting for user input
+set more off
+**********
+* LCFS data for tumble dryer uptake levels to 2005
+use "$efspath/EFS-2005-2006-extract-BA.dta", clear
+lookfor tumble weight
+tab year a167 [iw=weighta], row
+* 2005 only
+tab a167 c_nchild [iw=weighta] if year == 2005, col
+tab a167 c_nearners [iw=weighta] if year == 2005, col
+tab a167 c_empl [iw=weighta] if year == 2005, col
+**********
+* SPRG data on laundry practices
+use "$sprgpath/8369-clt-050312-v6-wf-safe.dta", clear
+desc q27*
+rename q27_sum sum_q27
+* 1 = yes, 2 = no
+recode q27* (2=0)
+* use mean to get % who said yes to each
+su q27* [iw=weight_respondent2], sep(0)
+* mean number of 'yes' responses
+su sum_q27 [iw=weight_respondent2]
+* distribution
+tab sum_q27 [iw=weight_respondent2]
+**********************************
+* codes of interest
+* 1974:	Main/Sec21 Laundry, ironing, clothing repair <- 50 Other essential domestic work (i.e. NOT preparing meals or routine housework)
+* 	so laundry in 1974 may be over-estimated
+* BUT 1975 is partly a 7 day diary - so more likely to detect laundry?
+* 2005:	Main/Sec21 Laundry, ironing, clothing repair <- Pact=7 (washing clothes)
+* start with processing the aggregate (survey) data
+use "$mtuspath/MTUS-adult-aggregate-UK-only-wf.dta", clear
+* drop all bad cases
+keep if badcase == 0
+* set as survey data for descriptives
+svyset [iw=propwt]
+* keep only 1974 & 2005 for simplicity
+* keep if survey == 1974 | survey == 2005
+* no, let's keep them all for birth cohort analysis!
+* this is minutes per day not episodes
+* check 18 (Cooking) & 20 (Cleaning) & 22 (maintain home/vehicle) against laundry
+* seems to under-report laundry in 1974, esp for women?
+svy: mean main18 main20 main21 main22, over(survey sex)
+* keep whatever sample we define above
+keep $mtusfilter
+* number of diary days by hh type
+* svy: tab hhtype survey, col count
+* number of diary days by number of days covered
+* 1974 = 7 day dairy
+svy: tab id survey, col count
+* keep only the vars we want to keep memory required low
+keep sex age main7 main21 hhtype empstat emp unemp student retired propwt survey day month year ///
+	hhldsize famstat nchild *pid ba*
+* number of diary-days
+svy: tab survey, obs
+preserve
+*************************
+* sampled data
+* this requires the 10 minute sampling process implemented in 
+* https://github.com/dataknut/MTUS/blob/master/process-MTUS-W6-convert-to-X-min-samples-v1.0-adult.do
+* to have been run over the MTUS first with X set to 10
+if `do_halfhour_samples' {
+	* merge in the sampled data
+	* do analysis by collapsing 10 minute sampled data to half hours
+	merge 1:m diarypid using "$mtuspath/MTUS-adult-episode-UK-only-wf-10min-samples-long-v1.0.dta", ///
+		gen(m_aggvars)
+	* set up half-hour variable
+	gen ba_hourt = hh(s_starttime)
+	gen ba_minst = mm(s_starttime)
+	gen ba_hh = 0 if ba_minst < 30
+	replace ba_hh = 30 if ba_minst > 29
+	gen ba_sec = 0
+	* sets date to 1969!
+	gen s_halfhour = hms(ba_hourt, ba_hh, ba_sec)
+	lab var s_halfhour "Episode starts during the half hour following"
+	format s_halfhour %tcHH:MM
+	* define laundry
+	gen laundry_p = 0
+	lab var laundry_p "Main act = laundry (21)"
+	replace laundry_p = 1 if pact == 21
+	gen laundry_s = 0
+	lab var laundry_s "Secondary act = laundry (21)"
+	replace laundry_s = 1 if sact == 21
+	gen laundry_all = 0
+	replace laundry_all = 1 if laundry_p == 1 | laundry_s == 1
+	lab var laundry_all "Any act = laundry (21)"
+	* done at home or elsewhere?
+	tab survey eloc if laundry_all == 1 [iw=propwt],  mi
+	* a lot of 1974 done 'elsewhere'?
+	* this is the number of 10 minute samples by survey & day of the week
+	tab survey day [iw=propwt]
+	* check % of sampled X minute points which are laundry
+	* NB reporting frame longer in 1974 (30 mins) so may be higher frequency (e.g. interruption in 10-20 mins coded)
+	di "* main"
+	tab survey laundry_p [iw=propwt]
+	di "* secondary"
+	tab survey laundry_s [iw=propwt]
+	di "* all"
+	tab survey laundry_all [iw=propwt]
+	* which years could we use?
+	tab month survey [iw=propwt]
+	* 1974 = Feb, Mar & Aug,Sept -> has winter & summer
+	* 1984 = winter only
+	* 1987 = early summer only
+	* 1995 = May
+	* 2000 = all year
+	* 2005 = each season (March, June, Sept, Nov)
+	* keep 1974 & 2005 only
+	keep if survey == 1974 | survey == 2005
+	* check for duplicates
+	duplicates report diarypid ba_starttime
+	* none
+	duplicates report diarypid s_halfhour
+	* three -> because each s_halfhour value can stand for x:10 x:20 x:30
+	* collapse to add up the sampled laundry by half hour
+	* use the byvars we're interested in (or could re-merge with aggregated file)
+	collapse (sum) laundry_* (mean) propwt, by(diarypid pid survey day month year s_halfhour ///
+		ba_birth_cohort ba_age_r ba_nchild sex emp empstat nchild)
+	* because the different surveys have different reporting periods we need to just count at least 1 laundry in the half hour
+	lab val emp EMP
+	lab val empstat EMPSTAT
+	local acts "p s all"
+	foreach a of local acts {
+		gen any_laundry_`a' = 0
+		replace any_laundry_`a' = 1 if laundry_`a' > 0
+	}
+	* the number of half hour data points by survey & day
+	tab survey day [iw=propwt]
+	svyset [iw=propwt]
+	* the distribution of laundry by survey and location
+	di "* primary"
+	svy: tab survey if any_laundry_p == 1, col ci
+	di "* secondary"
+	svy: tab survey if any_laundry_s == 1, col ci
+	di "* all"
+	svy: tab survey if any_laundry_all == 1, col ci
+	* by gender for all laundry reported
+	svy: tab survey sex if any_laundry_all == 1, ci row
+	* gender & age
+	svy: tab ba_age_r sex if any_laundry_all == 1 & survey == 1974, ci row
+	svy: tab ba_age_r sex if any_laundry_all == 1 & survey == 2005, ci row
+	* Separate days
+	table survey day [iw=propwt], by(any_laundry_all)
+	* days by gender
+	table survey day sex [iw=propwt], by(any_laundry_all)
+	* laundry by employment status if female
+	table survey day empstat if sex == 2 & any_laundry_all == 1 [iw=propwt]
+	* set time variable so can select by time & also tables should look nicer
+	xtset diarypid s_halfhour, delta(30 mins) format(%tcHH:MM)
+	di "* Tables for all days"
+	* All years, all days
+	table s_halfhour survey any_laundry_all [iw=propwt]
+	* days by half hour
+	table s_halfhour survey day [iw=propwt], by(any_laundry_all)	
+	* seasons
+	recode month (3 4 5 = 1 "Spring") (6 7 8 = 2 "Summer") (9 10 11 = 3 "Autumn") (12 1 2 = 4 "Winter"), gen(season)
+	* check
+	* tab month season
+	table s_halfhour survey season [iw=propwt], by(any_laundry_all)
+	* by half hour & employment status for women
+	table s_halfhour empstat survey if sex == 2 [iw=propwt], by(any_laundry_all)
+	*repeat by day for 2005
+	table s_halfhour empstat day if survey == 2005 & sex == 2 [iw=propwt], by(any_laundry_all)
+	* analysis by laundry type
+	* sunday morning
+	* only code for laundry within year
+	gen laundry_timing = 5 if any_laundry_all == 1 // other
+	replace laundry_timing = 1 if any_laundry_all == 1 & day == 1 & tin(08:00, 12:00) // sunday morning
+	replace laundry_timing = 2 if any_laundry_all == 1 & day > 1 & day < 6 & tin(09:00, 12:00) // weekday morning
+	replace laundry_timing = 3 if any_laundry_all == 1 & day > 1 & day < 6 & tin(17:00, 20:00) // weekday evening peak
+	replace laundry_timing = 4 if any_laundry_all == 1 & tin(00:00, 01:30) // night-time
+	replace laundry_timing = 4 if any_laundry_all == 1 & tin(22:30, 23:30) // night-time
+	tab laundry_timing, gen(laundry_timing_)
+	* check for missing	
+	table s_halfhour laundry_timing any_laundry_all, mi
+	lab def laundry_timing 1 "Sunday morning 09:00-12:00" 2 "Weekday morning 09:00-12:00" 3 "Weekday evening peak 17:00-20:00" 4 "Night-time 22:30-01:30" 5 "Other"
+	lab val laundry_timing laundry_timing
+	tab laundry_timing survey [iw=propwt], col
+	svy:tab laundry_timing survey, col ci
+	table laundry_timing ba_age_r survey [iw=propwt], col
+	table laundry_timing empstat survey [iw=propwt], col
+	table laundry_timing ba_nchild survey [iw=propwt], col
+	* collapse to single person record
+	* remember 1974/5 = 1 week diary
+	collapse (sum) laundry_timing_* any_laundry_all (mean) propwt, by(pid survey ///
+		ba_birth_cohort ba_age_r ba_nchild sex emp empstat nchild)
+	recode any_laundry_all (1/max=1)
+	recode laundry_timing_1 (1/max=1)
+	recode laundry_timing_2 (1/max=1)
+	recode laundry_timing_3 (1/max=1)
+	recode laundry_timing_4 (1/max=1)
+	recode laundry_timing_5 (1/max=1)
+	*how many people are in multiple types?
+	egen nlaundry_types = rowtotal( laundry_timing_*)
+	svy: tab nlaundry_types survey, col
+	* what % of respondents in each?
+	svy: mean laundry_timing_*, over(survey)
+	* % of launderers
+	svy: mean laundry_timing_* if any_laundry_all == 1, over(survey)
+	foreach v of numlist 1/4 {
+		logit laundry_timing_`v' sex ib4.empstat i.ba_age_r i.ba_nchild if survey == 1974
+		est store laundry_timing_`v'_1974
+		logit laundry_timing_`v' sex ib4.empstat i.ba_age_r i.ba_nchild if survey == 2005
+		est store laundry_timing_`v'_2005
+	}
+	estout laundry_*_2005 using "$rpath/laundry_type_1974_regressions.txt", cells("b ci_l ci_u se _star") stats(N r2_p chi2 p ll) replace
+	estout laundry_*_2005 using "$rpath/laundry_type_2005_regressions.txt", cells("b ci_l ci_u se _star") stats(N r2_p chi2 p ll) replace
+} 
+restore
+* we're back to the main survey aggregate file here.
+* drop diary duplicates & do some basic stats
+duplicates drop pid, force
+* create working age variable
+gen ba_working_age = 0
+replace ba_working_age = 1 if age > 18 // OK, it should be 16 but...
+* women
+replace ba_working_age = 0 if age > 60 & sex == 2
+* men
+replace ba_working_age = 0 if age > 65 & sex == 1
+* check
+table ba_age_r ba_working_age sex
+* Proportion of women in work
+tab survey empstat [iw=propwt] if ba_working_age == 1 & sex == 2, row
+di "Done!"
+log close
--- a/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Energy-Time-As-Submitted-v1.1.do
+++ b/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Energy-Time-As-Submitted-v1.1.do
--- a/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Energy-Time-As-Submitted-v1.2.do
+++ b/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Energy-Time-As-Submitted-v1.2.do
--- a/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Energy-Time-As-Submitted-v1.3.do
+++ b/Theme-1/laundryPaper/old_stata/DEMAND-BA-Laundry-Energy-Time-As-Submitted-v1.3.do