diff --git a/Theme-1/changeOverTime/changingPeakDemandMtus1974_2014_v2.0.Rmd b/Theme-1/changeOverTime/changingPeakDemandMtus1974_2014_v2.0.Rmd index b8504c1abc858feb1125121a6e813074129745ef..5a1111069ba3a8d87caa1e160c86e5f8dcf3127d 100644 --- a/Theme-1/changeOverTime/changingPeakDemandMtus1974_2014_v2.0.Rmd +++ b/Theme-1/changeOverTime/changingPeakDemandMtus1974_2014_v2.0.Rmd @@ -67,11 +67,16 @@ source(paste0(projLoc,"/demandFunctions.R")) # > Local functions ---- # Functions not used elsewhere +makeHmsTime <- function(dt,timeChar ="halfHourChar"){ + # makes hms time as r_hmsTime using halfHourChar by default + dt <- dt[, r_hmsTime := hms::parse_hm(get(timeChar))] # create hms half hour for plotting etc + return(dt) +} + ba_createStackedPlot <- function(dt,fillVar,yVar,yLabel,facetVars){ # intended for testing overall distributions - ggplot(dt, aes(y = get(yVar), x = r_startHalfHour, fill = get(fillVar))) + # forcats::fct_rev() to reverse categories + ggplot(dt, aes(y = get(yVar), x = r_hmsTime, fill = get(fillVar))) + # forcats::fct_rev() to reverse categories geom_area(position = "stack") + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + theme(strip.text.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5)) + facet_grid(eval(facetVars)) + @@ -84,15 +89,14 @@ ba_createStackedPlot <- function(dt,fillVar,yVar,yLabel,facetVars){ ) } -ba_createGreyStackedPlot <- function(dt,fillVar,yVar,yLabel,facetVars){ +ba_createStepPlot <- function(dt,fillVar,yVar,yLabel,facetVars){ # intended for testing overall distributions - ggplot(dt, aes(y = get(yVar), x = r_startHalfHour, fill = get(fillVar))) + # forcats::fct_rev() to reverse categories - geom_area(position = "stack") + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + + ggplot(dt, aes(y = get(yVar), x = r_hmsTime, linetype = get(fillVar), colour = get(fillVar))) + # forcats::fct_rev() to reverse categories + geom_step() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + theme(strip.text.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5)) + facet_grid(eval(facetVars)) + - scale_fill_grey() + + #scale_fill_grey() + theme(legend.title = element_blank()) + theme(legend.position = "right") + labs(caption = myCaption, @@ -101,30 +105,27 @@ ba_createGreyStackedPlot <- function(dt,fillVar,yVar,yLabel,facetVars){ ) } -ba_plotChange <- function(dt,dateBreaks,yLabel,facetVars){ - myPlot <- ggplot(dt[as.POSIXlt(r_startHalfHour)$hour >= 6], - aes(y = deltaMain, x = r_startHalfHour, colour = actLabel, - linetype = actLabel)) + - geom_line() + - scale_x_datetime(date_labels = "%H:%M", date_breaks = eval(dateBreaks)) + +ba_createGreyStackedPlot <- function(dt,fillVar,yVar,yLabel,facetVars){ + # intended for testing overall distributions + ggplot(dt, aes(y = get(yVar), x = r_hmsTime, fill = get(fillVar))) + # forcats::fct_rev() to reverse categories + geom_area(position = "stack") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - #scale_colour_grey() + + theme(strip.text.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5)) + facet_grid(eval(facetVars)) + + scale_fill_grey() + theme(legend.title = element_blank()) + theme(legend.position = "right") + labs(caption = myCaption, x = "Hour of the day", y = eval(yLabel) ) - return(myPlot) } ba_createDetailedPlot <- function(dt){ # requires a dt of just the single actLabel you want to plot - ggplot(dt, aes(x = r_startHalfHour, y = pcAnyHHMainWt, colour = as.factor(ba_survey))) + + ggplot(dt, aes(x = r_hmsTime, y = pcAnyHHMainWt, colour = as.factor(ba_survey))) + geom_line() + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + #scale_colour_grey() + facet_grid(r_wday ~ inHome) + theme(legend.title = element_blank()) + @@ -137,10 +138,9 @@ ba_createDetailedPlot <- function(dt){ ba_createDetailedAgePlot <- function(dt){ # requires a dt of just the single actLabel you want to plot - ggplot(dt, aes(x = r_startHalfHour, y = pcAnyHHMainWt, colour = as.factor(ba_survey))) + + ggplot(dt, aes(x = r_hmsTime, y = pcAnyHHMainWt, colour = as.factor(ba_survey))) + geom_line() + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + #scale_colour_grey() + facet_grid(r_wday ~ ba_workingAge) + theme(legend.title = element_blank()) + @@ -153,10 +153,9 @@ ba_createDetailedAgePlot <- function(dt){ ba_createDetailedChangePlot <- function(dt){ # requires a dt of just the single actLabel you want to plot - ggplot(dt, aes(x = r_startHalfHour, y = deltaMain, linetype = inHome, colour = r_wday)) + + ggplot(dt, aes(x = r_hmsTime, y = deltaMain, linetype = inHome, colour = r_wday)) + geom_line() + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + #scale_colour_grey() + theme(legend.title = element_blank()) + theme(legend.position = "bottom") + @@ -169,10 +168,9 @@ ba_createDetailedChangePlot <- function(dt){ ba_createDetailedChangeAgePlot <- function(dt){ # requires a dt of just the single actLabel you want to plot - ggplot(dt, aes(x = r_startHalfHour, y = deltaMain, linetype = inHome, colour = r_wday)) + + ggplot(dt, aes(x = r_hmsTime, y = deltaMain, linetype = inHome, colour = r_wday)) + geom_line() + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + #scale_colour_grey() + facet_grid(ba_workingAge ~ .) + theme(legend.title = element_blank()) + @@ -245,12 +243,13 @@ ba_DEMANDreLabelSvyAggActs <- function(dt){ reqLibsLocal <- c("data.table", # fast data munching "ggplot2", # for fancy graphs "readr", # fast csv reading/writing - "lubridate", # easy dates/times - "car", # post-regression stuff + "lubridate", # easy dates/times + "car", # post-regression stuff "sandwich", #for robust se "stargazer", # for pretty regression results "survey", # for weighted tables "doBy", # groupwise stats + "hms", # hms "robustbase", # robust glm "knitr" # for kable ) @@ -268,7 +267,11 @@ demandCopyright <- paste0(projLoc,"/demandCopyright.Rmd") # Parameters ---- # local MTUS (UK sample, processed 1974/5 - 2014/15) -mtusW6Path <- "~/Data/MTUS/World_6/" +if(userName == "ben" | userName == "ba1e12"){ + # assume laptop or RStudio Server + mtusW6Path <- "~/Data/MTUS/World_6/" +} + mtusW6aUKEpsF <- paste0(mtusW6Path, "processed/mtusW6aEpsUnifiedDT.csv.gz") mtusW6aUKSurveyF <- paste0(mtusW6Path, "processed/mtusW6aSurveyUnifiedDT.csv.gz") syntheticDiaryFile <- paste0(mtusW6Path, "processed/synthMTUSW6aEpsDT.csv.gz") @@ -282,9 +285,16 @@ syntheticDiaryFile <- paste0(mtusW6Path, "processed/synthMTUSW6aEpsDT.csv.gz") ngDPath <- "~/Data/UK_National_Grid" ``` -# To do +# About + +Version of code to match version 2.0 of paper submitted to Energy Policy: + + * removed all code & results not used in paper + * removed code will be foind in v1.9 of Rmd + +# To Do - * re-introduce regression models for early vs late evening food + * # Introduction @@ -319,32 +329,40 @@ ng2017DT <- ng2017DT[, r_date := dmy(SETTLEMENT_DATE)] # requires lubridate # rbind them to give full dataset ngDemandDT <- rbind(ng2005_2010DT,ng2011_2016DT,ng2017DT, fill = TRUE) # fill missing col in first file +# keep what we need +ngDemandDT <- ngDemandDT[, .(SETTLEMENT_DATE,SETTLEMENT_PERIOD,ENGLAND_WALES_DEMAND, + r_date)] + +# check +#head(ngDemandDT) + # set some dates & times -ngDemandDT <- ngDemandDT[, r_month := month(r_date, label = TRUE, abbr = TRUE)] # requires lubridate -ngDemandDT <- ngDemandDT[, r_year := year(r_date)] # requires lubridate -ngDemandDT <- ngDemandDT[, r_dow := wday(r_date, label = TRUE, abbr = TRUE)] # requires lubridate +ngDemandDT <- ngDemandDT[, r_month := lubridate::month(r_date, label = TRUE, abbr = TRUE)] # requires lubridate +ngDemandDT <- ngDemandDT[, r_year := lubridate::year(r_date)] # requires lubridate +ngDemandDT <- ngDemandDT[, r_dow := lubridate::wday(r_date, label = TRUE, abbr = FALSE)] # requires lubridate # table(ngDemandDT$r_year, ngDemandDT$r_month) # create a half-hour date-time ngDemandDT <- ngDemandDT[, r_fractionalHour := (SETTLEMENT_PERIOD/2)-0.5] -ngDemandDT <- ngDemandDT[, r_hour := floor(r_fractionalHour/1)] -ngDemandDT <- ngDemandDT[, r_mins := ifelse((r_fractionalHour*10)%%10==0, 0, 30)] -# this is broken -# ngDemandDT <- ngDemandDT[, r_halfHour := paste0(r_hour, ":", r_mins)] -# ngDemandDT <- ngDemandDT[, r_dateTime := strptime(paste0(r_date, " ", r_hour, ":", r_mins), "%y-%m-%d %H:%M" )] +ngDemandDT <- ngDemandDT[, r_hour := floor(r_fractionalHour/1)] # this will create some hour = 24 where we have clock changes +ngDemandDT <- ngDemandDT[r_hour == 24, r_hour := 0] # fix it +ngDemandDT <- ngDemandDT[, r_mins := ifelse((r_fractionalHour*10)%%10==0, 00, 30)] +ngDemandDT <- ngDemandDT[, halfHourChar := paste0(r_hour, ":", r_mins)] # <- create hh:mm time as char + +ngDemandDT <- makeHmsTime(ngDemandDT) # convert time to hh:mm # run some data checks at monthly level dataNote <- "Source: National Grid half-hourly demand data (England & Wales) 2005-2017" -tempDT <- ngDemandDT[, .(meanMW = mean(ENGLAND_WALES_DEMAND), +monthlyDT <- ngDemandDT[, .(meanMW = mean(ENGLAND_WALES_DEMAND), nObs = .N), keyby = .(r_year, r_month)] -ggplot(tempDT, aes(x = r_year, y = r_month, fill = nObs)) + +ggplot(monthlyDT, aes(x = r_year, y = r_month, fill = nObs)) + geom_tile() + labs(caption = paste0("Data check: number of monthly half hour observatons\n", dataNote), x = "Year", y = "Month") -ggplot(tempDT, aes(x = r_year, y = r_month, fill = meanMW)) + +ggplot(monthlyDT, aes(x = r_year, y = r_month, fill = meanMW)) + geom_tile() + scale_fill_continuous(low = "green", high = "red") + labs(caption = paste0("Monthly mean MW\n", dataNote), @@ -352,7 +370,7 @@ ggplot(tempDT, aes(x = r_year, y = r_month, fill = meanMW)) + y = "Month") -ggplot(tempDT, aes(x = r_month, y = meanMW, colour = r_year, group = r_year)) + # need to fix the fractional years on the legend +ggplot(monthlyDT, aes(x = r_month, y = meanMW, colour = r_year, group = r_year)) + # need to fix the fractional years on the legend geom_line() + ylim(0,NA) + labs(caption = paste0("Monthly mean MW\n", dataNote), @@ -365,44 +383,107 @@ ggplot(tempDT, aes(x = r_month, y = meanMW, colour = r_year, group = r_year)) + The last chart shows a quite substantial drop in demand over the last 10 years... at all times of year. -The next two charts show demand levels in January 2006 & 2016 as mean MW per hour. +```{r ng demand over time} +extractDT <- ngDemandDT[(r_year == 2006 | r_year == 2011 | r_year == 2016) & + (r_month == "Jan" | r_month == "Jul"), + .(r_hmsTime, halfHourChar, r_date, r_dow, r_month, r_year, ENGLAND_WALES_DEMAND)] # <- keep these only -```{r compareNGJanuary2006toJanuary2016} -dataNote <- "Source: National Grid half-hourly demand data (England & Wales) 2006-2016" +extractDT <- extractDT[, wDay := ifelse(r_dow != "Saturday" & r_dow != "Sunday", + "Weekday", + as.character(r_dow))] # <- create weekday as char for ease of labelling graphs + + +halfHourlyNgPlotDT <- extractDT[, .( meanGW = mean(ENGLAND_WALES_DEMAND)/1000), # <- use mean GW as across 5 weekdays + keyby = .(r_hmsTime, halfHourChar, wDay, r_month, r_year)] # <- all seasons + +``` -extractDT <- ngDemandDT[r_year == 2006 | r_year == 2011 | r_year == 2016] +The next two charts show demand levels in January 2006 & 2016 as mean MW per hour. Figure \ref(fig:compareTotal) shows mean MW across weekdays compared with each weekend day. Fig 1 in paper. + +```{r compareTotal, fig.cap="NG demand totals over time", fig.height=6} +addPeakPeriod <- function(plot){ + newPlot <- plot + + annotate("rect", xmin = hms::parse_hms("16:00:00"), + xmax = hms::parse_hms("20:00:00"), + ymin = yMin, ymax = yMax*1.01, alpha = rectAlpha, fill = vLineCol, colour = vLineCol) + + scale_x_time(breaks = c(hms::parse_hms("03:00:00"), + hms::parse_hms("06:00:00"), + hms::parse_hms("09:00:00"), + hms::parse_hms("12:00:00"), + hms::parse_hms("15:00:00"), + hms::parse_hms("18:00:00"), + hms::parse_hms("21:00:00") + ) #, date_labels = "%H:%M" + ) + + labs(caption = paste0(dataNote, "\nPeak demand period shown shaded")) + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + return(newPlot) +} + +dataNote <- "Source: National Grid half-hourly demand data (England & Wales) 2006-2016" -tempDT <- extractDT[r_dow != "Sat" & r_dow != "Sun", .( meanMW = mean(ENGLAND_WALES_DEMAND)), keyby = .(r_hour, r_year)] +yMax <- max(halfHourlyNgPlotDT$meanGW) +yMin <- min(halfHourlyNgPlotDT$meanGW) +rectAlpha <- 0.1 +vLineAlpha <- 0.4 +vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette +myTextSize <- 3 -rawPlot <- ggplot(tempDT, aes(x = r_hour, y = meanMW, colour = as.factor(r_year), group = r_year)) + # need to fix the fractional years on the legend +rawPlot <- ggplot(halfHourlyNgPlotDT, aes(x = r_hmsTime, y = meanGW, colour = as.factor(r_year), group = r_year)) + # need to fix the fractional years on the legend geom_line() + + facet_grid(r_month ~ wDay) + #ylim(0,NA) + - labs(caption = paste0("Hourly mean MW (weekdays)\n", dataNote), - x = "Hour", - y = "MW") + + labs(caption = dataNote, + x = "Time of Day", + y = "Mean GW") + theme(legend.title=element_blank()) #http://www.cookbook-r.com/Graphs/Legends_%28ggplot2%29/#hiding-the-legend-title -rawPlot + scale_colour_grey(start = 0.8, end = 0.2) +rawPlot + +myPlot <- addPeakPeriod(rawPlot) + scale_colour_grey(start = 0.8, end = 0.2) +myPlot + + labs(caption = paste0(dataNote, "\nPeak demand period shown shaded")) + + theme(legend.position = "bottom") + +ggsave("ngMeanGW.png") +``` + +Figure \ref(fig:compareNormalised) shows the same data but normalised to the overall mean. This shows the relative distribution of demand rather than absolute and illustrates the increased 'peakiness'. Fig 2 in paper. + +```{r compareNormalised, fig.cap="NG Demand normalised", fig.height=6} +# normalise by the mean for each year, month -# normalise by the mean for each year -mean2006 <- mean(tempDT[r_year == 2006]$meanMW) -mean2011 <- mean(tempDT[r_year == 2011]$meanMW) -mean2016 <- mean(tempDT[r_year == 2016]$meanMW) +halfHourlyNgPlotDT <- halfHourlyNgPlotDT[, normMean := meanGW/mean(meanGW), by = .(r_year, r_month)] +# halfHourlyNgPlotDT <- halfHourlyNgPlotDT[r_year == 2011, normMean := meanGW/mean2011] +# halfHourlyNgPlotDT <- halfHourlyNgPlotDT[r_year == 2016, normMean := meanGW/mean2016] -tempDT <- tempDT[r_year == 2006, normMean := meanMW/mean2006] -tempDT <- tempDT[r_year == 2011, normMean := meanMW/mean2011] -tempDT <- tempDT[r_year == 2016, normMean := meanMW/mean2016] +halfHourlyNgPlotDT <- halfHourlyNgPlotDT[, truncHH := hms::trunc_hms(r_hmsTime, 30*60)] # truncates to half hours -normPlot <- ggplot(tempDT, aes(x = r_hour, y = normMean, colour = as.factor(r_year), group = r_year)) + # need to fix the fractional years on the legend +normPlot <- ggplot(halfHourlyNgPlotDT, + aes(x = truncHH, y = normMean, + colour = as.factor(r_year), + group = r_year)) + # need to fix the fractional years on the legend geom_line() + + facet_grid(r_month ~ wDay) + #ylim(0,NA) + - labs(caption = paste0("Normalised hourly mean MW (weekdays)\n", dataNote), - x = "Hour", - y = "MW") + + labs(caption = dataNote, + x = "Time of Day", + y = "Normalised mean GW") + theme(legend.title=element_blank()) #http://www.cookbook-r.com/Graphs/Legends_%28ggplot2%29/#hiding-the-legend-title -normPlot + scale_colour_grey(start = 0.8, end = 0.2) +yMax <- max(halfHourlyNgPlotDT$normMean) +yMin <- min(halfHourlyNgPlotDT$normMean) +rectAlpha <- 0.1 +vLineAlpha <- 0.4 +vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette +myTextSize <- 3 + +myPlot <- addPeakPeriod(normPlot) + scale_colour_grey(start = 0.8, end = 0.2) +myPlot + + labs(caption = paste0(dataNote, "\nPeak demand period shown shaded")) + + theme(legend.position = "bottom") +ggsave("ngNormalisedMeanGW.png") ``` @@ -431,7 +512,7 @@ mtusW6aUKSurveyOrigDT <- as.data.table(read_csv(mtusW6aUKSurveyF)) # process or add variables here before making survey version ---- -# check +# check t <- with(mtusW6aUKSurveyOrigDT, table(ba_age_r,ba_survey,useNA = "always")) t @@ -440,7 +521,7 @@ t mtusW6aUKSurveyOrigDT <- mtusW6aUKSurveyOrigDT[!is.na(ba_age_r), ba_workingAge := ifelse(ba_age_r == "(0,16]" | ba_age_r == "66-75" | ba_age_r == "75+", "65+", "16-64")] -# check +# check t <- with(mtusW6aUKSurveyOrigDT, table(ba_age_r,ba_workingAge,useNA = "always")) @@ -470,7 +551,7 @@ t <- svytable(~ba_survey + ba_age_r, svyMtusW6aUKSurveyDT, na.action=na.pass ) -kable(caption="MTUS 1974-2015 Survay data: age ranges (weighted)", +kable(caption="MTUS 1974-2015 Survey data: age ranges (weighted)", t ) ``` @@ -504,7 +585,7 @@ We can also see that the time-use surveys for 1995 and 2005 have substantially l synthW6aEpsDT <- synthW6aEpsOrigDT # in case of accidents - don't need to re-run the data load mtusW6aUKSurveyDT <- mtusW6aUKSurveyOrigDT -# keep good survey years - episode data ---- +# keep good survey years - episode data ---- t <- synthW6aEpsDT[, .(nEpisodes = .N, nDiaries = uniqueN(ba_diarypid), nRespondents = uniqueN(ba_pid)), keyby = ba_survey ] @@ -519,7 +600,7 @@ t <- synthW6aEpsDT[, .(nEpisodes = .N, kable(caption = "Number of episodes, diaries & respondents by year (synthetic episode data, after bad years removed)", t) -# keep good survey years - survey data ---- +# keep good survey years - survey data ---- t <- mtusW6aUKSurveyDT[, .(nDiaries = uniqueN(ba_diarypid), nRespondents = uniqueN(ba_pid)), keyby = ba_survey ] @@ -565,8 +646,7 @@ synthW6aEpsDT <- dt # this will definitely now only have episodes that are from dt <- NULL # remove to save memory ``` - -# DEMAND Acts +# Code DEMAND Acts We use this data to code the main and secondary activities into a non-arbitrary but highly aggregated set of 10 DEMAND 'Acts'. This enables us to more easily depict change over time at a coarse level. The aggregated codes are as follows: @@ -670,13 +750,8 @@ Note that this scheme is entirely open for revision. Feedback welcome. synthW6aEpsDT <- ba_DEMANDcodeActs(synthW6aEpsDT) # construct a fake half hour in r time ---- -# this fixes the problem of no episode start times where the episodes are imputed into half hour 'slots' as the episode spans more than 1 half hour -# this will have date set to 'today' - i.e. the date on which it is run -synthW6aEpsDT <- synthW6aEpsDT[, r_startHalfHour := as.POSIXct(paste0(r_hour.spine, ":", r_min.spine), - format = "%H:%M" - ) - ] - +# keep this as a char until we need to draw graphs +synthW6aEpsDT <- synthW6aEpsDT[, halfHourChar := paste0(r_hour.spine, ":", r_min.spine)] # create a month -> season synthW6aEpsDT <- synthW6aEpsDT[, month := as.POSIXlt(r_epStartDate)$mon] @@ -749,7 +824,7 @@ kable(caption = "Check coding of weekdays & week-ends", t) hhDemandActsDT <- synthW6aEpsDT[, .(nHHmain = .N ), - keyby = .(ba_survey, ba_pid, ba_diarypid, r_wday, r_startHalfHour, inHome, da_main) + keyby = .(ba_survey, ba_pid, ba_diarypid, r_wday, halfHourChar, inHome, da_main) ] hhDemandActsDT <- hhDemandActsDT[, anyHHmain := ifelse(nHHmain > 0, @@ -762,10 +837,10 @@ hhDemandActsDT <- hhDemandActsDT[, anyHHmain := ifelse(nHHmain > 0, hhDemandActsDT <- ba_DEMANDreLabelActs(hhDemandActsDT) -# add survey data (including weights) +# XXX Add survey data (including weights) ---- # ,ba_sex,ba_nchild,ba_npeople,empstat # only add the vars you want to use as it seems to slow down the svydesign function (why?) -tempDT <- mtusW6aUKSurveyDT[, .(ba_survey, ba_pid, ba_diarypid,ba_workingAge,indWeight,diaryWeight)] +tempDT <- mtusW6aUKSurveyDT[, .(ba_survey, ba_pid, ba_diarypid,ba_workingAge,ba_sex,empstat,indWeight,diaryWeight)] setkey(tempDT, ba_survey, ba_diarypid) setkey(hhDemandActsDT, ba_survey, ba_diarypid) hhDemandActsDT <- tempDT[hhDemandActsDT] @@ -795,6 +870,24 @@ t <- testDT[, .(nCases = uniqueN(ba_pid)), by = ba_survey] kable(caption = "Lost cases by survey (due to diaryWeight == 0 or NA", t) ``` +## Check secondary acts + +```{r table main acts} +t <- with(synthW6aEpsDT, table(da_main, ba_survey)) + +kable(caption="MTUS 1974-2015 Survey data: Main 'DEMAND Act' (weighted, col %)", + round(100*prop.table(t,2),2) +) +``` + +```{r table secondary acts} +t <- table(synthW6aEpsDT$da_sec, synthW6aEpsDT$ba_survey) + +kable(caption="MTUS 1974-2015 Survey data: Secondary 'DEMAND Act' (weighted, col %)", + round(100*prop.table(t,2),2) +) +``` + # Analysis @@ -804,11 +897,9 @@ kable(caption = "Lost cases by survey (due to diaryWeight == 0 or NA", t) * Cooking * Occupancy – number of persons or just ‘any person’? -## DEMAND activities over time - Analysis below uses unweightd data except where explicitly stated. -### Test weights +## Test weights Some quick tests on weighted vs un-weighted data. @@ -836,23 +927,23 @@ The following chart extends this analysis to time of day by showing the weighted # aggregate to wday only # unweighted counts aggHhDemandActsDT <- hhDemandActsDT[, .(anyHHmain = sum(anyHHmain)), - by = .(ba_survey, r_wday, r_startHalfHour, actLabel)] + by = .(ba_survey, r_wday, halfHourChar, actLabel)] aggHhDemandActsDT <- aggHhDemandActsDT[, actLabelChar := as.character(actLabel)] # Create table of n half hours reported in total # needed for the aggregation tables to create % hh Act was reported aggAllHHDT <- hhDemandActsDT[, .(nHHs = sum(anyHHmain)), - keyby = .(ba_survey, r_wday, r_startHalfHour) + keyby = .(ba_survey, r_wday, halfHourChar) ] -setkey(aggHhDemandActsDT, ba_survey, r_wday, r_startHalfHour) -aggHhDemandActsDT <- merge(aggHhDemandActsDT, aggAllHHDT) +setkey(aggHhDemandActsDT, ba_survey, r_wday, halfHourChar) +aggHhDemandActsDT <- merge(aggHhDemandActsDT, aggAllHHDT) # <- this requires half hours to be char or integers not hms which is why we keep them as char until plotting aggHhDemandActsDT <- aggHhDemandActsDT[, pcAnyHHMain := 100*(anyHHmain/nHHs)] aggHhDemandActsDT <- ba_DEMANDreLabelSvyAggActs(aggHhDemandActsDT) # as.data.table creates char from svy results # weighted counts -wtActs <- svytable(~ba_survey + r_wday + r_startHalfHour + actLabel, svyhhDemandActsDT, +wtActs <- svytable(~ba_survey + r_wday + halfHourChar + actLabel, svyhhDemandActsDT, round = TRUE, exclude = NULL, na.action=na.pass @@ -861,40 +952,38 @@ wtActs <- as.data.table(wtActs) wtActs <- wtActs[, anyHHmainWt := N] # count all half hours -wtAllHHs <- svytable(~ba_survey + r_wday + r_startHalfHour, svyhhDemandActsDT, +wtAllHHs <- svytable(~ba_survey + r_wday + halfHourChar, svyhhDemandActsDT, round = TRUE, exclude = NULL, na.action=na.pass ) wtAllHHs <- as.data.table(wtAllHHs) wtAllHHs <- wtAllHHs[, nHHsWt := N] -setkey(wtActs, ba_survey, r_wday, r_startHalfHour) -setkey(wtAllHHs, ba_survey, r_wday, r_startHalfHour) +setkey(wtActs, ba_survey, r_wday, halfHourChar) +setkey(wtAllHHs, ba_survey, r_wday, halfHourChar) wt <- wtActs[wtAllHHs] wt <- wt[, actLabelChar := as.character(actLabel)] wt <- wt[, pcAnyHHMainWt := 100*(anyHHmainWt/nHHsWt)] -wt <- wt[, ba_survey := as.integer(ba_survey)] # as.data.table creates char from svy results +aggHhDemandActsWtDT <- wt[, ba_survey := as.integer(ba_survey)] # as.data.table creates char from svy results # fix actLabel factor ordering - -aggHhDemandActsWtDT <- wt[, r_startHalfHour := as.POSIXct(r_startHalfHour)] aggHhDemandActsWtDT <- ba_DEMANDreLabelSvyAggActs(aggHhDemandActsWtDT) # as.data.table creates char from svy results -setkey(aggHhDemandActsDT, ba_survey, r_wday, r_startHalfHour, actLabel) -setkey(aggHhDemandActsWtDT, ba_survey, r_wday, r_startHalfHour, actLabel) +setkey(aggHhDemandActsDT, ba_survey, r_wday, halfHourChar, actLabel) +setkey(aggHhDemandActsWtDT, ba_survey, r_wday, halfHourChar, actLabel) dt <- merge(aggHhDemandActsDT,aggHhDemandActsWtDT) +dt <- makeHmsTime(dt) # create hms half hour for plotting etc dt <- dt[, diff := pcAnyHHMain - pcAnyHHMainWt] myCaption <- "DEMAND Synthetic MTUS: 1974 - 2014" -ggplot(dt, aes(x = r_startHalfHour, y = diff, colour = actLabel)) + +ggplot(dt, aes(x = r_hmsTime, y = diff, colour = actLabel)) + geom_point() + theme(strip.text.y = element_text(size = 8, colour = "Black", angle = 0)) + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - facet_grid(ba_survey ~ r_wday) + + facet_grid(ba_survey ~ r_wday, scales = "free") + labs(caption = myCaption, x = "Time of day", y = "% point difference (un-weighted - weighted)" @@ -906,73 +995,28 @@ Again, these results show minor variations with the maximum difference being +/- All subsequent analysis uses weighted data unless specifically stated (e.g. for testing purposes) -### DEMAND: All age groups combined - -#### Whole day +## DEMAND: All age groups combined The following charts use the 'any in a half hour' data to show the percentage of half hours reported as a given act at a given time of day. The percentage charts are useful for assessing the changing composition of in-home vs out-of-home activities but can over-emphasise the apparent prevalence of certain acts when it is less common to be out of the home - such as out of home sleep during 01:00 - 05:00. -As there is the potential for more than one activity to be reported in a given half hour the activities may sum to greater than 100% and this is especially visible for diaries with smaller duration slots and more additional variables which lead to more frequent episode boundaries (e.g. 2000 & 2014/15). - -First we present results just for 2014 to give a snapshot of the most recent data. - -```{r 2014 as line chart} -# re-use aggHhDemandActsWtDT -aggHhDemandActsWtDT <- ba_DEMANDreLabelSvyAggActs(aggHhDemandActsWtDT) # as.data.table creates char from svy results - -dt <- aggHhDemandActsWtDT[ba_survey=="2014"] - -# requires a dt of just the single actLabel you want to plot -myCaption <- "Synthetic MTUS 2014, UK sample (%, weighted)" - - ggplot(dt, aes(x = r_startHalfHour, y = pcAnyHHMainWt, colour = actLabel)) + - geom_line() + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - #scale_colour_grey() + - facet_grid(. ~ r_wday) + - theme(legend.title = element_blank()) + - theme(legend.position = "bottom") + - labs(caption = myCaption, - x = "Hour of the day", - y = "% of half hours" - ) -``` - -```{r plot DEMAND acts (stacked)} - -myCaption <- "Synthetic MTUS 1974, UK sample (%, weighted)" -ba_createStackedPlot(aggHhDemandActsWtDT[ba_survey == "1974"], yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ r_wday") - -ba_createGreyStackedPlot(aggHhDemandActsWtDT[ba_survey == "1974"], yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ r_wday") - - -myCaption <- "Synthetic MTUS 1985, UK sample (%, weighted)" -ba_createStackedPlot(aggHhDemandActsWtDT[ba_survey == "1985"], yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ r_wday") - -myCaption <- "Synthetic MTUS 2000, UK sample (%, weighted)" -ba_createStackedPlot(aggHhDemandActsWtDT[ba_survey == "2000"], yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ r_wday") +Fig \ref(fig:allYearsStackedByWeekday) shows all DEMAND activities stacked by day of the week. (no longer used in paper) -myCaption <- "Synthetic MTUS 2014, UK sample (%, weighted)" -ba_createStackedPlot(aggHhDemandActsWtDT[ba_survey == "2014"], yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ r_wday") -``` - -Finally we plot all years combined. +```{r allYearsStackedByWeekday, fig.cap="Synthetic MTUS 1974 - 2014, UK sample (% of halfhours on given day reporting X as main activity, weighted)"} -```{r all years stacked} +aggHhDemandActsWtDT <- makeHmsTime(aggHhDemandActsWtDT) # add hms time myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (%, weighted)" ba_createStackedPlot(aggHhDemandActsWtDT, yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ r_wday") ``` -Compare 1974 & 2014 but without looking at day of the week & in-home only (paper v2.0, Fig 2, revised). Adjust height of chart to fit single A4 page? +Fig \ref(fig:allYearsStackedByWeekday) shows all DEMAND activities stacked by day of the week uses line chart to show all acts in vs out of home. Fig 3 in paper. -```{r repeat all acts but split by at home vs not at home, fig.height=10, fig.width=8} +```{r allYearsLineByLocation, fig.height=10, fig.width=8} # this requires creating a new aggregated table # weighted counts -wtActs <- svytable(~ba_survey + r_startHalfHour + inHome + actLabel, svyhhDemandActsDT, +wtActs <- svytable(~ba_survey + halfHourChar + inHome + actLabel, svyhhDemandActsDT, round = TRUE, exclude = NULL, na.action=na.pass @@ -981,15 +1025,15 @@ wtActs <- as.data.table(wtActs) wtActs <- wtActs[, anyHHmainWt := N] # count all half hours -wtAllHHs <- svytable(~ba_survey + r_startHalfHour + inHome, svyhhDemandActsDT, +wtAllHHs <- svytable(~ba_survey + halfHourChar + inHome, svyhhDemandActsDT, round = TRUE, exclude = NULL, na.action=na.pass ) wtAllHHs <- as.data.table(wtAllHHs) wtAllHHs <- wtAllHHs[, nHHsWt := N] -setkey(wtActs, ba_survey, r_startHalfHour, inHome) -setkey(wtAllHHs, ba_survey, r_startHalfHour, inHome) +setkey(wtActs, ba_survey, halfHourChar, inHome) +setkey(wtAllHHs, ba_survey, halfHourChar, inHome) wt <- wtActs[wtAllHHs] wt <- wt[, actLabelChar := as.character(actLabel)] wt <- wt[, pcAnyHHMainWt := 100*(anyHHmainWt/nHHsWt)] @@ -997,53 +1041,36 @@ wt <- wt[, pcAnyHHMainWt := 100*(anyHHmainWt/nHHsWt)] wt <- wt[, ba_survey := as.integer(ba_survey)] # as.data.table creates char from svy results # fix actLabel factor ordering -wt <- wt[, r_startHalfHour := as.POSIXct(r_startHalfHour)] +wt <- makeHmsTime(wt) # add hms time plotDT <- ba_DEMANDreLabelSvyAggActs(wt) # as.data.table creates char from svy results # for paper myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (%, weighted)" -dt <- plotDT[inHome == "At own home"] -ba_createStackedPlot(dt, yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ inHome") + - scale_fill_distiller(palette = "Spectral") -``` - -Now plot % point change ignoring day of the week (paper Fig 2, adjust height & width to suit single A4) - -```{r pc point change all days, fig.height=10, fig.width=8} -dt1974WtDT <- plotDT[ba_survey == "1974"] -dt1974WtDT <- dt1974WtDT[, pcAnyHHMainWt.1974 := pcAnyHHMainWt] -dt2014WtDT <- plotDT[ba_survey == "2014"] -dt2014WtDT <- dt2014WtDT[, pcAnyHHMainWt.2014 := pcAnyHHMainWt] -setkey(dt1974WtDT, r_startHalfHour, inHome, actLabel) -setkey(dt2014WtDT, r_startHalfHour, inHome, actLabel) - -dtChange1974_2014WtDT <- dt1974WtDT[dt2014WtDT] -dtChange1974_2014WtDT <- dtChange1974_2014WtDT[, deltaMain := pcAnyHHMainWt.2014 - pcAnyHHMainWt.1974] - -myPlot <- ba_plotChange(dtChange1974_2014WtDT, dateBreaks = "2 hours", - yLabel = "% point change 1974 - 2014", - facetVars = "inHome ~ .") # plot as normal ggplot (easier for paper writing) +#dt <- plotDT[inHome == "At own home"] +yMax <- max(plotDT$pcAnyHHMainWt) +myAlpha <- 0.1 +vLineAlpha <- 0.4 +vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette +myTextSize <- 3 -myPlot +# ba_createStackedPlot(plotDT, yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ inHome") + +# scale_fill_hue() -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (%, 16:00 - 21:00, weighted)" -myPlot <- ba_plotChange(dtChange1974_2014WtDT[as.POSIXlt(r_startHalfHour)$hour >= 16 & - as.POSIXlt(r_startHalfHour)$hour <= 21], dateBreaks = "1 hours", - yLabel = "% point change 1974 - 2014", - facetVars = "inHome ~ .") # plot as normal ggplot (easier for paper writing) +ba_createStepPlot(plotDT, yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ inHome") + + scale_fill_hue() -myPlot +ggsave("allYearsLinePlot.png", height = 8) ``` -Next we separate out 'in home' vs 'not in home' activities (as reported) - Fig 3. +Separate out weekdays from weekend days and select specific variables to plot... -```{r repeat all acts but split by day of week and at home vs not at home} +```{r selectedActsDeltaPrep} # this requires creating a new aggregated table # weighted counts -wtActs <- svytable(~ba_survey + r_wday + r_startHalfHour + inHome + actLabel, svyhhDemandActsDT, +wtActs <- svytable(~ba_survey + r_wday + halfHourChar + inHome + actLabel, svyhhDemandActsDT, round = TRUE, exclude = NULL, na.action=na.pass @@ -1052,15 +1079,15 @@ wtActs <- as.data.table(wtActs) wtActs <- wtActs[, anyHHmainWt := N] # count all half hours -wtAllHHs <- svytable(~ba_survey + r_wday + r_startHalfHour + inHome, svyhhDemandActsDT, +wtAllHHs <- svytable(~ba_survey + r_wday + halfHourChar + inHome, svyhhDemandActsDT, round = TRUE, exclude = NULL, na.action=na.pass ) wtAllHHs <- as.data.table(wtAllHHs) wtAllHHs <- wtAllHHs[, nHHsWt := N] -setkey(wtActs, ba_survey, r_wday, r_startHalfHour, inHome) -setkey(wtAllHHs, ba_survey, r_wday, r_startHalfHour, inHome) +setkey(wtActs, ba_survey, r_wday, halfHourChar, inHome) +setkey(wtAllHHs, ba_survey, r_wday, halfHourChar, inHome) wt <- wtActs[wtAllHHs] wt <- wt[, actLabelChar := as.character(actLabel)] wt <- wt[, pcAnyHHMainWt := 100*(anyHHmainWt/nHHsWt)] @@ -1068,28 +1095,10 @@ wt <- wt[, pcAnyHHMainWt := 100*(anyHHmainWt/nHHsWt)] wt <- wt[, ba_survey := as.integer(ba_survey)] # as.data.table creates char from svy results # fix actLabel factor ordering -wt <- wt[, r_startHalfHour := as.POSIXct(r_startHalfHour)] +wt <- makeHmsTime(wt) # add hms time aggHhDemandActsInHomeWtDT <- ba_DEMANDreLabelSvyAggActs(wt) # as.data.table creates char from svy results -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (%, weighted, not at own home)" -ba_createStackedPlot(aggHhDemandActsInHomeWtDT[inHome != "At own home"], yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ r_wday") - -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (%, weighted, at own home)" -ba_createStackedPlot(aggHhDemandActsInHomeWtDT[inHome == "At own home"], yVar = "pcAnyHHMainWt", fillVar = "actLabel", yLabel = "% halfhours (main act)", facetVars = "ba_survey ~ r_wday") - -``` - -> 2nd acts not useful - -As the above representation is difficult to interpret, we next present a derived graph which shows the change in % of halfhours in which these activities were recorded from 1974 to 2014. This chart therefore shows aboslute change of percentage points and highlights those relatively frequent activities which have changed a lot. - -Fig 3 - -> NB: ignores hours < 06:00 to avoid noisy data - - -```{r all - plot pc change 1974_2014, fig.height=10, fig.width=8} # re-use dt from above @@ -1098,41 +1107,77 @@ dt1974WtDT <- aggHhDemandActsInHomeWtDT[ba_survey == "1974"] dt1974WtDT <- dt1974WtDT[, pcAnyHHMainWt.1974 := pcAnyHHMainWt] dt2014WtDT <- aggHhDemandActsInHomeWtDT[ba_survey == "2014"] dt2014WtDT <- dt2014WtDT[, pcAnyHHMainWt.2014 := pcAnyHHMainWt] -setkey(dt1974WtDT, r_wday, r_startHalfHour, inHome, actLabel) -setkey(dt2014WtDT, r_wday, r_startHalfHour, inHome, actLabel) +setkey(dt1974WtDT, r_wday, r_hmsTime, inHome, actLabel) +setkey(dt2014WtDT, r_wday, r_hmsTime, inHome, actLabel) dtChange1974_2014WtDT <- dt1974WtDT[dt2014WtDT] dtChange1974_2014WtDT <- dtChange1974_2014WtDT[, deltaMain := pcAnyHHMainWt.2014 - pcAnyHHMainWt.1974] -myPlot <- ba_plotChange(dtChange1974_2014WtDT, dateBreaks = "2 hours", - yLabel = "% point change 1974 - 2014", - facetVars = "r_wday ~ inHome") # plot as normal ggplot (easier for paper writing) - -myPlot +# select the variables we want to plot +changePlotDT <- dtChange1974_2014WtDT[actLabel %like% "Personal" | + actLabel %like% "Work" | + actLabel %like% "Travel" & inHome %like% "Not"| # <- avoid in-home travel! (0) + actLabel %like% "Food" | + actLabel %like% "Media", .(r_hmsTime, inHome, r_wday, actLabel, actLabelChar, pcAnyHHMainWt.1974, pcAnyHHMainWt.2014, deltaMain) + ] +# with(changePlotDT, table(inHome, actLabelChar)) +# fiddle around so we can avoid facet by inHome +changePlotDT <- changePlotDT[, actLabelChar := ifelse(actLabel %like% "Work" & + inHome %like% "Not", + "Work (out of home)", actLabelChar)] # <- to distinguish from at home work + +changePlotDT <- changePlotDT[, actLabelChar := ifelse(actLabel %like% "Work" & + inHome %like% "At", + "Work (at home)", actLabelChar)] # <- to distinguish + + +# flag the ones to plot +changePlotDT <- changePlotDT[, plotMe := ifelse(inHome %like% "At" | + actLabel %like% "Travel" | + actLabel %like% "Work", 1, 0)] +# set deltaPlot function +ba_plotChangeDow <- function(dt,actVar,yLabel){ # change plot function + myPlot <- ggplot(dt, aes(y = deltaMain, x = r_hmsTime, + colour = get(actVar), # colours may not match Fig 1 + linetype = get(actVar))) + + geom_line() + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + + #scale_colour_grey() + + facet_grid(r_wday ~ .) + + theme(legend.title = element_blank()) + + theme(legend.position = "bottom") + + labs(caption = myCaption, + x = "Hour of the day", + y = eval(yLabel) + ) + return(myPlot) +} +``` -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, at own home)" -myPlot <- ba_plotChange(dtChange1974_2014WtDT[inHome == "At own home"], - dateBreaks = "2 hours", - yLabel = "% point change 1974 - 2014", - facetVars = "r_wday ~ .") # plot as normal ggplot (easier for paper writing) -myPlot +Fig \ref(fig:selectedActsDeltaPlot) drawa % point change plot 1974 - 2014 for selected variables. (paper v2.0 Fig 4) -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, Not at own home)" -myPlot <- ba_plotChange(dtChange1974_2014WtDT[inHome != "At own home"], - dateBreaks = "2 hours", - yLabel = "% point change 1974 - 2014", - facetVars = "r_wday ~ .") # plot as normal ggplot (easier for paper writing) +> NB: ignores hours < 06:00 to avoid noisy data -myPlot +All activities at except Travel or where labelled. -# for paper (merge days) +```{r selectedActsDeltaPlot, fig.height=6} +myPlot <- ba_plotChangeDow(changePlotDT[plotMe == 1 & lubridate::hour(r_hmsTime) >= 6], # <- select after 05:59 + actVar = "actLabelChar", + yLabel = "% point change 1974 - 2014") # plot as normal ggplot (easier for paper writing) +yMax <- max(changePlotDT[plotMe == 1 & lubridate::hour(r_hmsTime) >= 6]$deltaMain) +yMin <- min(changePlotDT[plotMe == 1 & lubridate::hour(r_hmsTime) >= 6]$deltaMain) +rectAlpha <- 0.1 +vLineAlpha <- 0.4 +vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette +myTextSize <- 3 +addPeakPeriod(myPlot) + geom_hline(yintercept = 0) +ggsave("selectedActsDeltaPlot.png", height = 8) ``` - This charts shows: * decrease in incidence of 'work' - most probably due to a decreasing % of the population in work (ageing population) @@ -1141,108 +1186,132 @@ This charts shows: * decrease in 'social' later in evenings * increase in media use especially on weekday evenings and at weekends (NB this data excludes children...) -##### In detail - -We now produce a few more detailed plots (for edf presentation). +## Compare TU & NG demand data -```{r food related} -foodChangeDT <- dtChange1974_2014WtDT[actLabel == "Food"] -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, food related)" -ba_createDetailedChangePlot(foodChangeDT) +```{r check TU act change against NG} +# can only do this for NG data from 2005 so use MTUS 2001 - 2014 +myCaption <- "Synthetic MTUS 2000 - 2014, UK sample (Weighted)" +dt2000WtDT <- aggHhDemandActsInHomeWtDT[ba_survey == "2000"] +dt2000WtDT <- dt2000WtDT[, pcAnyHHMainWt.2000 := pcAnyHHMainWt] -foodDT <- aggHhDemandActsInHomeWtDT[actLabel == "Food"] -ba_createDetailedPlot(foodDT) - -``` - -```{r personal or home care related} -personalChangeDT <- dtChange1974_2014WtDT[actLabel == "Personal/home"] +dt2014WtDT <- aggHhDemandActsInHomeWtDT[ba_survey == "2014"] +dt2014WtDT <- dt2014WtDT[, pcAnyHHMainWt.2014 := pcAnyHHMainWt] +setkey(dt2000WtDT, r_wday, r_hmsTime, inHome, actLabel) +setkey(dt2014WtDT, r_wday, r_hmsTime, inHome, actLabel) -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, personal & home care related)" -ba_createDetailedChangePlot(personalChangeDT) +dtChange2000_2014WtDT <- dt2000WtDT[dt2014WtDT] -personalDT <- aggHhDemandActsInHomeWtDT[actLabel == "Personal/home"] -ba_createDetailedPlot(personalDT) +dtChange2000_2014WtDT <- dtChange2000_2014WtDT[, deltaMain := pcAnyHHMainWt.2014 - pcAnyHHMainWt.2000] -``` -```{r media related} -mediaChangeDT <- dtChange1974_2014WtDT[actLabel == "Media"] +tempDeltaDT <- dtChange2000_2014WtDT[(actLabel == "Food" | actLabel == "Media" | + actLabel %like% "Personal") & + r_wday == "Monday-Friday" & + inHome == "At own home", .(r_hmsTime, deltaMain, actLabel)] -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, media related)" -ba_createDetailedChangePlot(mediaChangeDT) +tempDeltaDT <- tempDeltaDT[, deltaAllSum := sum(deltaMain), keyby = .(r_hmsTime)] +setkey(tempDeltaDT, r_hmsTime) -mediaDT <- aggHhDemandActsInHomeWtDT[actLabel == "Media"] -ba_createDetailedPlot(mediaDT) -``` +# r_hmsTime >= hms::as.hms("16:00:00") & +# r_hmsTime <= hms::as.hms("22:00:00") -#### Evening Peak +yMax <- max(tempDeltaDT$deltaMain) +yMin <- min(tempDeltaDT$deltaMain) -The first chart shows the (weighted) proportion of half-hours in which the DEMAND acts were reported in 2014 during this period as context. +myPlot <- ggplot2::ggplot(tempDeltaDT[r_hmsTime >= hms::as.hms("07:00:00")], aes(x = r_hmsTime)) + + #geom_point(aes(y = deltaAllSum)) + + geom_col(aes(y = deltaMain, fill = actLabel), position = "dodge") + + geom_hline(yintercept = 0) +addPeakPeriod(myPlot) -```{r create 2014 peak time DEMAND acts plot weighted} -ba_createLinePlot <- function(dt,yVar,yLabel,dateBreaks){ - ggplot(dt, aes(y = get(yVar), x = r_startHalfHour, linetype = actLabel, colour = actLabel)) + - geom_line() + - scale_x_datetime(date_labels = "%H:%M", date_breaks = dateBreaks) + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - theme(strip.text.y = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - facet_grid(ba_survey + inHome ~ r_wday) + - #scale_fill_grey() + - theme(legend.title = element_blank()) + +# NG data +ng2006DT <- halfHourlyNgPlotDT[r_year == 2006] +ng2006DT <- ng2006DT[, normMeanGW2006 := normMean] +ng2006DT <- ng2006DT[, meanGW2006 := meanGW] + +ng2016DT <- halfHourlyNgPlotDT[r_year == 2016] +ng2016DT <- ng2016DT[, normMeanGW2016 := normMean] +ng2016DT <- ng2016DT[, meanGW2016 := meanGW] + +setkey(ng2006DT, halfHourChar, wDay, r_month) +setkey(ng2016DT, halfHourChar, wDay, r_month) + +plotNGDT <- ng2006DT[ng2016DT] + +plotNGDT <- plotNGDT[, normDiffGW := normMeanGW2016 - normMeanGW2006] # -ve = drop +plotNGDT <- plotNGDT[, diffGW := meanGW2016 - meanGW2006] # -ve = drop + +ggplot(plotNGDT[r_month == "Jan" | r_month == "Jul"], aes(x = r_hmsTime, y = diffGW)) + + geom_step() + + facet_grid(wDay ~ r_month) + +ggplot(plotNGDT[r_month == "Jan" | r_month == "Jul"], aes(x = r_hmsTime, y = normDiffGW)) + + geom_step() + + facet_grid(wDay ~ r_month) + + geom_hline(yintercept = 0, colour = "red") + +compareJunDT <- plotNGDT[wDay == "Weekday" & r_month == "Jul", .(meanNormDiffGW = mean(normDiffGW), + meanDiffGW = mean(diffGW)), keyby = .(r_hmsTime)] +setkey(compareJunDT, r_hmsTime) + +setkey(tempDeltaDT, r_hmsTime) + +compareJunDT <- compareJunDT[tempDeltaDT] + +myPlot <- ggplot2::ggplot(compareJunDT, aes(x = r_hmsTime, )) + + geom_point(aes(y = deltaAllSum, colour = "% point change in acts reported")) + + geom_point(aes(y = 10 * meanNormDiffGW, colour = "Mean normalised GWh delta")) + +addPeakPeriod(myPlot) + +compareJunDT <- compareJunDT[, peakPeriod := "Other"] +compareJunDT <- compareJunDT[r_hmsTime >= hms::as.hms("16:00:00") & + r_hmsTime <= hms::as.hms("20:00:00"), peakPeriod := "16:00 - 20:00 - Evening peak"] +compareJunDT <- compareJunDT[r_hmsTime > hms::as.hms("20:00:00") & + r_hmsTime <= hms::as.hms("23:30:00"), peakPeriod := "20:00 - 23:00 - Later evening"] +compareJunDT <- compareJunDT[r_hmsTime >= hms::as.hms("07:00:00") & + r_hmsTime <= hms::as.hms("08:30:00"), peakPeriod := "07:00 - 08:30 - Morning peak"] +compareJunDT <- compareJunDT[r_hmsTime >= hms::as.hms("11:00:00") & + r_hmsTime <= hms::as.hms("14:30:00"), peakPeriod := "11:00 - 14:30 - Solar peak"] +addPlotStuff <- function(p){ + p <- p + + geom_hline(yintercept = 0, colour = "grey") + + geom_smooth(colour = "black" , se=TRUE) + + geom_point(aes(colour = peakPeriod)) + theme(legend.position = "bottom") + - labs(caption = myCaption, - x = "Hour of the day", - y = eval(yLabel) - ) + theme(legend.title = element_blank()) + + guides(col=guide_legend(ncol=2)) + + facet_grid(. ~ actLabel) + return(p) } +myPlot <- ggplot2::ggplot(compareJunDT, aes(x = meanDiffGW, y = deltaMain)) + + geom_hline(yintercept = 0) + + labs(x = "Mean change in GW", + y = "% point change in recorded half hours") +addPlotStuff(myPlot) + +myPlot <- ggplot2::ggplot(compareJunDT, aes(x = meanNormDiffGW, y = deltaMain)) + + geom_vline(xintercept = 0, colour = "grey") + + labs(x = "Mean change in normalised GW", + y = "% point change in recorded half hours") +addPlotStuff(myPlot) +``` -# reuse aggHhDemandActsInHomeWtDT -myCaption <- "Synthetic MTUS 2014, UK sample (Weighted, at own home)" +## DEMAND: Working age group (16-64) vs retired (65+) -dt <- aggHhDemandActsInHomeWtDT[ba_survey == "2014" & - as.POSIXlt(r_startHalfHour)$hour >= 16 & - as.POSIXlt(r_startHalfHour)$hour < 21 & - inHome == "At own home"] -ba_createLinePlot(dt, - yVar = "pcAnyHHMainWt", - yLabel = "% halfhours (main act)", - dateBreaks = "1 hour" - ) +Weighted table of respondents by age (repeats table above?) -myCaption <- "Synthetic MTUS 2014, UK sample (Weighted, not at own home)" -dt <- aggHhDemandActsInHomeWtDT[ba_survey == "2014" & - as.POSIXlt(r_startHalfHour)$hour >= 16 & - as.POSIXlt(r_startHalfHour)$hour < 21 & - inHome != "At own home"] -ba_createLinePlot(dt, - yVar = "pcAnyHHMainWt", - yLabel = "% halfhours (main act)", - dateBreaks = "1 hour" - ) -``` - -Next we use the 1974 - 2014 changes to present the changes in the peak demand period for weekdays vs Saturdays and Sundays. The first version keeps the at own home/out of home panels as the vertical columns, the second switches them to highlight change. Need to be consistent inthe paper... +```{r Respondents - by age} +t <- svytable(~ ba_survey + ba_age_r, subset(svyMtusW6aUKSurveyDT, ba_age_r != "(0,16]"), round = TRUE, exclude=NULL, na.action=na.pass) -```{r All - Evening peak change by day of the week} -# re-use dtChange1974_2014WtDT -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted)" -ba_plotChange(dtChange1974_2014WtDT[as.POSIXlt(r_startHalfHour)$hour >= 16 & - as.POSIXlt(r_startHalfHour)$hour < 21], dateBreaks = "1 hour", - yLabel = "% point change 1974 - 2014", - facetVars = "inHome ~ r_wday") - -ba_plotChange(dtChange1974_2014WtDT[as.POSIXlt(r_startHalfHour)$hour >= 16 & - as.POSIXlt(r_startHalfHour)$hour < 21], dateBreaks = "1 hour", - yLabel = "% point change 1974 - 2014", - facetVars = "r_wday ~ inHome ") +kable(caption="MTUS 1974-2015 Survey data: age of respondents (weighted, row %)", + round(100*prop.table(t,1),2) +) ``` -### DEMAND: Working age group (16-64) vs retired (65+) - The following table shows the % of respondents aged 16+ in different work status over the period. The decline in the % of those in full time work is partly due to increased numbers of people in the older (retired) age cohorts. ```{r Table of workers - all ages} @@ -1256,12 +1325,12 @@ kable(caption="MTUS 1974-2015 Survey data: employment status (weighted, row %)", This is corrected by the following table which shows the work status of all aged 16-64 in each survey for men and for women. As we can see there has been a general reduction in the % of male respondents in full time work over the period alongside an increase in the % in part time work. In contrast the opposite trends are seen for women with a steady increase in the % in full time work and a similar (but less steep) upward trend in the % in part-time work. Given the ongoing gendered nature of energy-using domestic acitivties (refs) these trends provide an important context to the patterns of time-use we discuss below. ```{r Table of workers by sex 16-64} -# subset to select working age group +# subset to select working age group t <- svytable(~ ba_survey + empstat, subset(svyMtusW6aUKSurveyDT, ba_workingAge == "16-64" & ba_sex == "Male"), round = TRUE, exclude=NULL, na.action=na.pass) -kable(caption="MTUS 1974-2014 Survey data: Male employment status (weighted, row %, all aged 16-65)", +kable(caption="MTUS 1974-2014 Survey data: Male employment status (weighted, row %, all aged 16-64)", round(100*prop.table(t,1),2) ) @@ -1269,7 +1338,7 @@ t <- svytable(~ ba_survey + empstat, subset(svyMtusW6aUKSurveyDT, ba_workingAge == "16-64" & ba_sex == "Female"), round = TRUE, exclude=NULL, na.action=na.pass) -kable(caption="MTUS 1974-2014 Survey data: Female employment status (weighted, row %, all aged 16-65)", +kable(caption="MTUS 1974-2014 Survey data: Female employment status (weighted, row %, all aged 16-64)", round(100*prop.table(t,1),2) ) @@ -1293,27 +1362,11 @@ We run the 'whole day' and 'eveing peak' analysis in direct sequence as one is j > NB: ignores hours < 06:00 to avoid noisy data -```{r 16-64 - setup} - -ba_plotChange16_64 <- function(dt,dateBreaks,yLabel){ - myPlot <- ggplot(dt[as.POSIXlt(r_startHalfHour)$hour > 6], aes(y = deltaMain, x = r_startHalfHour, colour = actLabel, linetype = actLabel)) + - geom_line() + - scale_x_datetime(date_labels = "%H:%M", date_breaks = eval(dateBreaks)) + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - #scale_colour_grey() + - facet_grid(r_wday ~ ba_workingAge) + - theme(legend.title = element_blank()) + - theme(legend.position = "right") + - labs(caption = myCaption, - x = "Hour of the day", - y = eval(yLabel) - ) - return(myPlot) -} +```{r 16_64setup} # this requires creating a new aggregated table # weighted counts -wtActs <- svytable(~ba_survey + r_wday + r_startHalfHour + inHome + ba_workingAge + actLabel, svyhhDemandActsDT, +wtActs <- svytable(~ba_survey + r_wday + halfHourChar + inHome + ba_workingAge + ba_sex + actLabel, svyhhDemandActsDT, round = TRUE, exclude = NULL, na.action=na.pass @@ -1322,15 +1375,15 @@ wtActs <- as.data.table(wtActs) wtActs <- wtActs[, anyHHmainWt := N] # count all half hours -wtAllHHs <- svytable(~ba_survey + r_wday + r_startHalfHour + inHome + ba_workingAge, svyhhDemandActsDT, +wtAllHHs <- svytable(~ba_survey + r_wday + halfHourChar + inHome + ba_workingAge + ba_sex, svyhhDemandActsDT, round = TRUE, exclude = NULL, na.action=na.pass ) wtAllHHs <- as.data.table(wtAllHHs) wtAllHHs <- wtAllHHs[, nHHsWt := N] -setkey(wtActs, ba_survey, r_wday, r_startHalfHour, inHome, ba_workingAge) -setkey(wtAllHHs, ba_survey, r_wday, r_startHalfHour, inHome, ba_workingAge) +setkey(wtActs, ba_survey, r_wday, halfHourChar, inHome, ba_workingAge,ba_sex) +setkey(wtAllHHs, ba_survey, r_wday, halfHourChar, inHome, ba_workingAge,ba_sex) wt <- wtActs[wtAllHHs] wt <- wt[, actLabelChar := as.character(actLabel)] wt <- wt[, pcAnyHHMainWt := 100*(anyHHmainWt/nHHsWt)] @@ -1338,7 +1391,7 @@ wt <- wt[, pcAnyHHMainWt := 100*(anyHHmainWt/nHHsWt)] wt <- wt[, ba_survey := as.integer(ba_survey)] # as.data.table creates char from svy results # fix actLabel factor ordering -wt <- wt[, r_startHalfHour := as.POSIXct(r_startHalfHour)] +wt <- makeHmsTime(wt) aggHhDemandActsInHomeAgeWtDT <- ba_DEMANDreLabelSvyAggActs(wt) # as.data.table creates char from svy results @@ -1347,72 +1400,79 @@ dt1974WtDT <- aggHhDemandActsInHomeAgeWtDT[ba_survey == "1974"] dt1974WtDT <- dt1974WtDT[, pcAnyHHMainWt.1974 := pcAnyHHMainWt] dt2014WtDT <- aggHhDemandActsInHomeAgeWtDT[ba_survey == "2014"] dt2014WtDT <- dt2014WtDT[, pcAnyHHMainWt.2014 := pcAnyHHMainWt] -setkey(dt1974WtDT, r_wday, r_startHalfHour, inHome, ba_workingAge,actLabel) -setkey(dt2014WtDT, r_wday, r_startHalfHour, inHome, ba_workingAge,actLabel) +setkey(dt1974WtDT, r_wday, r_hmsTime, inHome, ba_workingAge,ba_sex,actLabel) +setkey(dt2014WtDT, r_wday, r_hmsTime, inHome, ba_workingAge,ba_sex,actLabel) dtChange1974_2014_workingAgeWtDT <- dt1974WtDT[dt2014WtDT] dtChange1974_2014_workingAgeWtDT <- dtChange1974_2014_workingAgeWtDT[, deltaMain := pcAnyHHMainWt.2014 - pcAnyHHMainWt.1974] +# select the variables we want to plot +changePlotDT <- dtChange1974_2014_workingAgeWtDT[actLabel %like% "Personal" | + actLabel %like% "Work" | + actLabel %like% "Travel" & inHome %like% "Not"| # <- avoid in-home travel! (0) + actLabel %like% "Food" | + actLabel %like% "Media", .(r_hmsTime, inHome, r_wday, actLabel, actLabelChar, pcAnyHHMainWt.1974, pcAnyHHMainWt.2014, deltaMain, ba_workingAge, ba_sex) + ] +# with(changePlotDT, table(inHome, actLabelChar)) +# fiddle around so we can avoid facet by inHome +changePlotDT <- changePlotDT[, actLabelChar := ifelse(actLabel %like% "Work" & + inHome %like% "Not", + "Work (out of home)", actLabelChar)] # <- to distinguish from at home work + +changePlotDT <- changePlotDT[, actLabelChar := ifelse(actLabel %like% "Work" & + inHome %like% "At", + "Work (at home)", actLabelChar)] # <- to distinguish + + +# flag the ones to plot +changePlotDT <- changePlotDT[, plotMe := ifelse(inHome %like% "At" | + actLabel %like% "Travel" | + actLabel %like% "Work", 1, 0)] ``` -Out of home: Fig 4 - -```{r 16-64 - plot day of week change for 1974 - 2014 out of home, fig.height=10, fig.width=8} +16-64 - All activities at home except Travel or where labelled (Figure \ref(fig:deltaPlotDow16_64) - Fig 5 in paper). -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, Not at own home)" -ba_plotChange16_64(dtChange1974_2014_workingAgeWtDT[inHome != "At own home"], dateBreaks = "2 hours", yLabel = "% point change 1974 - 2014") # plot as normal ggplot (easier for paper writing) +```{r deltaPlotDow16_64, fig.height=8, fig.cap="deltaPlotDow16_64"} +plotDT <- changePlotDT[plotMe == 1 & ba_workingAge == "16-64" & + lubridate::hour(r_hmsTime) >= 6] # <- select after 05:59 +myPlot <- ba_plotChangeDow(plotDT, + actVar = "actLabelChar", + yLabel = "% point change 1974 - 2014") # plot as normal ggplot (easier for paper writing) +yMax <- max(plotDT$deltaMain) +yMin <- min(plotDT$deltaMain) +rectAlpha <- 0.1 +vLineAlpha <- 0.4 +vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette +myTextSize <- 3 +addPeakPeriod(myPlot) + geom_hline(yintercept = 0) + facet_grid(r_wday ~ ba_sex) +ggsave("deltaPlotDow16_64.png", height = 8) ``` +65+ - All activities at home except Travel or where labelled (Figure \ref(fig:deltaPlotDow64m) - Fig 6 in paper). -Essentially repeat the above analysis but for evening peak. - -Out of home evening peak: - -```{r 16-64 - plot day of week change for 1974 - 2014 out of home evening peak} - -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, Not at own home)" -ba_plotChange16_64(dtChange1974_2014_workingAgeWtDT[inHome != "At own home" & - as.POSIXlt(r_startHalfHour)$hour >= 16 & - as.POSIXlt(r_startHalfHour)$hour <= 21], dateBreaks = "2 hours", yLabel = "% point change 1974 - 2014") # plot as normal ggplot (easier for paper writing) +```{r deltaPlotDow64m, fig.height=8, fig.cap="deltaPlotDow64m"} +plotDT <- changePlotDT[plotMe == 1 & ba_workingAge != "16-64" & + lubridate::hour(r_hmsTime) >= 6] # <- select after 05:59 +myPlot <- ba_plotChangeDow(plotDT, + actVar = "actLabelChar", + yLabel = "% point change 1974 - 2014") # plot as normal ggplot (easier for paper writing) +yMax <- max(plotDT$deltaMain) +yMin <- min(plotDT$deltaMain) +rectAlpha <- 0.1 +vLineAlpha <- 0.4 +vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette +myTextSize <- 3 +addPeakPeriod(myPlot) + geom_hline(yintercept = 0) + facet_grid(r_wday ~ ba_sex) +ggsave("deltaPlotDow64m.png", height = 8) ``` -At home: Fig 5 - -```{r 16-64 - plot day of week change for 1974 - 2014 at own home, fig.height=10, fig.width=8} -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, At own home)" - -myPlot <- ba_plotChange16_64(dtChange1974_2014_workingAgeWtDT[inHome == "At own home"], dateBreaks = "2 hours", yLabel = "% point change 1974 - 2014") # plot as normal ggplot (easier for paper writing) - -myPlot - - -``` - -At home evening peak - -```{r 16-64 - plot day of week change for 1974 - 2014 at own home evening peak} -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, At own home)" - -myPlot <- ba_plotChange16_64(dtChange1974_2014_workingAgeWtDT[inHome == "At own home" & - as.POSIXlt(r_startHalfHour)$hour >= 16 & - as.POSIXlt(r_startHalfHour)$hour <= 21], dateBreaks = "2 hours", yLabel = "% point change 1974 - 2014") # plot as normal ggplot (easier for paper writing) - -myPlot - - -``` - -This charts shows: - -> to do - -### Food & Media: Regression analysis +## Detailed Activities: Descriptive analysis However, such descriptive analysis does not enable robust statistical claims regarding change during the evening peak period. To provide such evidence we developed two Poisson models - one for the early evening period from 16:00 to 18:00 on weekdays () and one for 18:00 – 21:00 () on weekdays, reflecting the descriptive analysis/charts above. For each model we first assess the statistical effect of survey year (sub-model one) and then year plus age group (sub-model two) on the number of half hours that the given activity was reported in those periods. @@ -1424,20 +1484,43 @@ Test this using two poisson models - one for 'early' eating and one for 'late' e Also use two logit to test p(any reported) - this tests predictors of reporting any activity in the time period and may be easier to interpret. +```{r plotChange} +deltaPlotDT <- dtChange1974_2014_workingAgeWtDT[r_hmsTime >= hms::as.hms("07:00:00") & + (actLabel == "Food" | actLabel == "Media" | + actLabel %like% "Personal") & + r_wday == "Monday-Friday" & + inHome == "At own home", .(r_hmsTime, deltaMain, actLabel, ba_workingAge, ba_sex)] + +yMax <- max(deltaPlotDT$deltaMain) +yMin <- min(deltaPlotDT$deltaMain) + +myPlot <- ggplot2::ggplot(deltaPlotDT, aes(x = r_hmsTime)) + + geom_line(aes(y = deltaMain, colour = actLabel)) + + geom_hline(yintercept = 0) + + theme(legend.position = "bottom") + + theme(legend.title = element_blank()) + + guides(col=guide_legend(ncol=2)) + + labs(y = "% point change 1974 - 2014", + x = "Time of Day") + + facet_grid(ba_sex ~ ba_workingAge) + +dataNote <- "MTUS 1974 - 2014" +addPeakPeriod(myPlot) +``` + NB: we use interaction terms for survey year & working age to understand how older people's reported activities have changed over time. -Before we start regression analysis, construct line charts showing % reporting this activity per half hour for each survey year - to give a better sense of the trajectory of change than the 1974 -> 2014 total diff. +Before we start regression analysis, construct line charts showing % reporting this activity per half hour for each survey year - to give a better sense of the trajectory of change than the 1974 -> 2014 total diff. (not used in final version of paper) -```{r yearly change - food} -ba_createActLinePlot <- function(dt,yVar,yLabel,dateBreaks){ - ggplot(dt, aes(y = get(yVar), x = r_startHalfHour, - linetype = factor(ba_survey), +```{r allActsYearlyChangeByAgeGender} +ba_createActLinePlot <- function(dt,yVar,yLabel){ + ggplot(dt, aes(y = get(yVar), x = r_hmsTime, colour = factor(ba_survey))) + - geom_line() + scale_colour_grey(start = 0.8, end = 0.2) + # http://ggplot2.tidyverse.org/reference/scale_brewer.html - scale_x_datetime(date_labels = "%H:%M", date_breaks = dateBreaks) + + geom_line() + + scale_colour_grey(start = 0.8, end = 0.2) + # http://ggplot2.tidyverse.org/reference/scale_brewer.html theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + theme(strip.text.y = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - facet_grid(ba_workingAge ~ actLabel) + + facet_grid(. ~ ba_sex) + #scale_fill_grey() + theme(legend.title = element_blank()) + theme(legend.position = "bottom") + @@ -1450,40 +1533,92 @@ ba_createActLinePlot <- function(dt,yVar,yLabel,dateBreaks){ # reuse aggHhDemandActsInHomeWtDT myCaption <- "Synthetic MTUS 2014, UK sample (Weighted, at own home, Mondays-Fridays)" -dt <- aggHhDemandActsInHomeAgeWtDT[(actLabel == "Food" | actLabel == "Media") & +dt <- aggHhDemandActsInHomeAgeWtDT[(actLabel == "Food" | actLabel == "Media" | + actLabel %like% "Personal") & r_wday == "Monday-Friday" & - inHome == "At own home"] - -ba_createActLinePlot(dt, - yVar = "pcAnyHHMainWt", - yLabel = "% halfhours (main act)", - dateBreaks = "1 hour" - ) + inHome == "At own home" & ba_workingAge == "16-64"] # retry as seperate charts for clarity ba_createActLinePlot(dt[actLabel == "Food"], yVar = "pcAnyHHMainWt", - yLabel = "% halfhours (Food as main act)", - dateBreaks = "1 hour" + yLabel = "% halfhours (Food as main act)" ) +ggsave("foodChange.png") ba_createActLinePlot(dt[actLabel == "Media"], yVar = "pcAnyHHMainWt", - yLabel = "% halfhours (Media as main act)", - dateBreaks = "1 hour" + yLabel = "% halfhours (Media as main act)" + ) +ggsave("mediaChange.png") + +ba_createActLinePlot(dt[actLabel %like% "Personal"], + yVar = "pcAnyHHMainWt", + yLabel = "% halfhours (Personal/home care as main act)" + ) +ggsave("personalChange.png") + +dt <- aggHhDemandActsInHomeAgeWtDT[(actLabel == "Travel" | + actLabel %like% "Work") & + inHome != "At own home" & r_wday == "Monday-Friday" & ba_workingAge == "16-64"] + +ba_createActLinePlot(dt[actLabel %like% "Work"], + yVar = "pcAnyHHMainWt", + yLabel = "% halfhours (Work as main act)" + ) +ggsave("workOutOfHomeChange.png") + +ba_createActLinePlot(dt[actLabel %like% "Travel"], + yVar = "pcAnyHHMainWt", + yLabel = "% halfhours (Travel as main act)" ) +ggsave("travelChange.png") ``` + +## Regression analysis + +Change of purpose of models - to estimate statistical effects of being in empstat on p(late food) etc. This uses respondents aged 16-64 only as we are interested in the effects of changing work patterns (primarily) + ```{r set all model data tables} # don't need weighted data for regressions -# flag food ---- -hhDemandActsDT <- hhDemandActsDT[, anyDA_Food_m := ifelse(actLabel == "Food", 1, 0)] +hhDemandActsDT <- makeHmsTime(hhDemandActsDT) # make time for easy selection into periods + +# reverse order of some vars for regressions +# http://www.cookbook-r.com/Manipulating_data/Changing_the_order_of_levels_of_a_factor/ +hhDemandActsDT <- hhDemandActsDT[, ba_sex := as.factor(ba_sex)] +hhDemandActsDT <- hhDemandActsDT[, ba_sex := relevel(ba_sex, "Male")] +hhDemandActsDT <- hhDemandActsDT[, empstat := as.factor(empstat)] +hhDemandActsDT <- hhDemandActsDT[, empstat := relevel(empstat, "not in paid work")] +hhDemandActsDT <- hhDemandActsDT[, ba_workingAge := as.factor(ba_workingAge)] +``` + +### Run Food models + * Poisson (counts of half-hours) + * Logit (any half-hours) + +Model 1: survey year * sex +Model 2: sex * empl for all in 2014 only -foodEarlyDT <- hhDemandActsDT[as.POSIXlt(r_startHalfHour)$hour >= 16 & as.POSIXlt(r_startHalfHour)$hour < 18 & inHome == "At own home" & r_wday == "Monday-Friday", .(sumDA_Food_m = sum(anyDA_Food_m)), by = .(ba_pid, ba_diarypid, ba_survey, ba_workingAge)] # generates a count for poisson +```{r Run food models} +hhDemandActsDT <- hhDemandActsDT[, anyDA_Food_m := ifelse(actLabel == "Food", 1, 0)] +# aggregate food +foodEarlyDT <- hhDemandActsDT[lubridate::hour(r_hmsTime) >= 16 & + r_hmsTime <= hms::as.hms("18:00:00") & + inHome == "At own home" & + r_wday == "Monday-Friday", .(sumDA_Food_m = sum(anyDA_Food_m), + minTime = min(r_hmsTime), + maxTime = max(r_hmsTime)), + by = .(ba_pid, ba_diarypid, ba_survey, ba_sex, ba_workingAge, empstat)] # generates a count for poisson # generate a flag (if we want to use logit) foodEarlyDT <- foodEarlyDT[, anyDA_Food_m := ifelse(sumDA_Food_m > 0, 1, 0)] -foodLateDT <- hhDemandActsDT[as.POSIXlt(r_startHalfHour)$hour >= 18 & as.POSIXlt(r_startHalfHour)$hour < 21 & inHome == "At own home" & r_wday == "Monday-Friday", .(sumDA_Food_m = sum(anyDA_Food_m)), by = .(ba_pid, ba_diarypid, ba_survey, ba_workingAge)] # generates a count +foodLateDT <- hhDemandActsDT[r_hmsTime >= hms::as.hms("18:30:00") & + r_hmsTime <= hms::as.hms("20:30:00") & + inHome == "At own home" & + r_wday == "Monday-Friday", .(sumDA_Food_m = sum(anyDA_Food_m), + minTime = min(r_hmsTime), + maxTime = max(r_hmsTime)), + by = .(ba_pid, ba_diarypid, ba_survey, ba_workingAge, ba_sex, empstat)] # generates a count foodLateDT <- foodLateDT[, anyDA_Food_m := ifelse(sumDA_Food_m > 0, 1, 0)] setkey(foodEarlyDT, ba_survey, ba_diarypid) @@ -1491,94 +1626,49 @@ setkey(foodLateDT, ba_survey, ba_diarypid) setkey(mtusW6aUKSurveyDT, ba_survey, ba_diarypid) # don't need weights for regression analysis # table -t <- foodEarlyDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), by = ba_survey] +t <- foodEarlyDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), keyby = ba_survey] kable(caption = "Obs counts for 'early food' model", t) # table -t <- foodLateDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), by = ba_survey] +t <- foodLateDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), keyby = ba_survey] kable(caption = "Obs counts for 'late food' model", t) -# flag media ---- -hhDemandActsDT <- hhDemandActsDT[, anyDA_Media_m := ifelse(actLabel == "Media", 1, 0)] - -mediaEarlyDT <- hhDemandActsDT[as.POSIXlt(r_startHalfHour)$hour >= 16 & as.POSIXlt(r_startHalfHour)$hour < 18 & inHome == "At own home" & r_wday == "Monday-Friday", .(sumDA_Media_m = sum(anyDA_Media_m)), by = .(ba_pid, ba_diarypid, ba_survey, ba_workingAge)] # generates a count for poisson -# generate a flag (if we want to use logit) -mediaEarlyDT <- mediaEarlyDT[, anyDA_Media_m := ifelse(sumDA_Media_m > 0, 1, 0)] - -mediaLateDT <- hhDemandActsDT[as.POSIXlt(r_startHalfHour)$hour >= 18 & as.POSIXlt(r_startHalfHour)$hour < 21 & inHome == "At own home" & r_wday == "Monday-Friday", .(sumDA_Media_m = sum(anyDA_Media_m)), by = .(ba_pid, ba_diarypid, ba_survey, ba_workingAge)] # generates a count -# generate a flag (if we want to use logit) -mediaLateDT <- mediaLateDT[, anyDA_Media_m := ifelse(sumDA_Media_m > 0, 1, 0)] - -setkey(mediaEarlyDT, ba_survey, ba_diarypid) -setkey(mediaLateDT, ba_survey, ba_diarypid) -setkey(mtusW6aUKSurveyDT, ba_survey, ba_diarypid) # don't need weights for regression analysis - -# table -t <- mediaEarlyDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), by = ba_survey] - -kable(caption = "Obs counts for 'early media' model", t) - -# table -t <- mediaLateDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), by = ba_survey] - -kable(caption = "Obs counts for 'late media' model", t) -``` - -#### Run food models: - - * Poisson (counts of half-hours) - * Logit (any half-hours) - - -```{r Run food models} +# models ---- # Use robust SE (https://stats.idre.ucla.edu/r/dae/poisson-regression/) # We have repeat observations in years where there were multi-day diaries- see: # foodEarlyDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), by = ba_survey] +simpleModel <- "sumDA_Food_m ~ as.factor(ba_survey)*ba_sex" +fullModel <- "sumDA_Food_m ~ as.factor(ba_survey) + ba_sex*empstat" -# Early: Poisson -foodEarlyModelP1rob <- glmrob(formula = sumDA_Food_m ~ as.factor(ba_survey), family="poisson", foodEarlyDT) -foodEarlyModelP2rob <- glmrob(formula = sumDA_Food_m ~ as.factor(ba_survey)*ba_workingAge, family="poisson", foodEarlyDT) - -# Logit -foodEarlyModelL1rob <- glmrob(formula = anyDA_Food_m ~ as.factor(ba_survey), family=binomial(logit), foodEarlyDT) -foodEarlyModelL2rob <- glmrob(formula = anyDA_Food_m ~ as.factor(ba_survey)*ba_workingAge, family=binomial(logit), foodEarlyDT) - -# Late: Poisson -foodLateModelP1rob <- glmrob(formula = sumDA_Food_m ~ as.factor(ba_survey), family="poisson", foodLateDT) -foodLateModelP2rob <- glmrob(formula = sumDA_Food_m ~ as.factor(ba_survey)*ba_workingAge, family="poisson", foodLateDT) - -# Logit -foodLateModelL1rob <- glmrob(formula = anyDA_Food_m ~ as.factor(ba_survey), family=binomial(logit), foodLateDT) -foodLateModelL2rob <- glmrob(formula = anyDA_Food_m ~ as.factor(ba_survey)*ba_workingAge, family=binomial(logit), foodLateDT) +## Poisson +foodEarlyModelP1rob <- glmrob(formula = as.formula(simpleModel), family="poisson", foodEarlyDT) +foodEarlyModelP2rob <- glmrob(formula = as.formula(fullModel), family="poisson", + foodEarlyDT) +foodLateModelP1rob <- glmrob(formula = as.formula(simpleModel), family="poisson", foodLateDT) +foodLateModelP2rob <- glmrob(formula = as.formula(fullModel), family="poisson", + foodLateDT) ``` - The following tables report the regression results more neatly. Early food: +```{r check earlyFood} +summary(foodEarlyDT) +# times +print("From:") +hms::as.hms(min(foodEarlyDT$minTime)) +print("To:") +hms::as.hms(max(foodEarlyDT$maxTime)) -First, the poisson model which predicts counts of half-hours. +``` ```{r Report early food poisson model results with robust SE, results='asis'} stargazer(foodEarlyModelP1rob,foodEarlyModelP2rob, - column.labels = c("Early: Model 1 (poisson)", "Early: Model 2 (poisson)"), - ci = TRUE, - single.row = TRUE, - type = "html", - keep.stat=c("aic", "chi2","ll","n"), # only seems to report n? - star.cutoffs = c(0.05, 0.01, 0.001) - ) -``` - -Second, the logit model which is predicting any food related activities in that period. - -```{r Report early food logit model results with robust SE, results='asis'} -stargazer(foodEarlyModelL1rob,foodEarlyModelL2rob, - column.labels = c("Early: Model 1 (logit)", "Early: Model 2 (logit)"), + column.labels = c("Food Early: Model 1 (poisson)","Food Early: Model 2 (poisson)"), ci = TRUE, single.row = TRUE, type = "html", @@ -1589,11 +1679,18 @@ stargazer(foodEarlyModelL1rob,foodEarlyModelL2rob, Late food: -First, the poisson model which predicts counts of half-hours. +```{r check lateFood} +summary(foodLateDT) +# times +print("From:") +hms::as.hms(min(foodLateDT$minTime)) +print("To:") +hms::as.hms(max(foodLateDT$maxTime)) +``` ```{r Report late food poisson model results with robust SE, results='asis'} stargazer(foodLateModelP1rob,foodLateModelP2rob, - column.labels = c("Late: Model 1 (poisson)", "Late: Model 2 (poisson)"), + column.labels = c("Food Late: Model 1 (poisson)","Food Late: Model 2 (poisson)"), ci = TRUE, single.row = TRUE, type = "html", @@ -1602,74 +1699,74 @@ stargazer(foodLateModelP1rob,foodLateModelP2rob, ) ``` -Second, the logit model which is predicting any food related activities in that period. +### Run Personal/home care models: -```{r Report late food logit model results with robust SE, results='asis'} -stargazer(foodLateModelL1rob,foodLateModelL2rob, - column.labels = c("Late: Model 1 (logit)", "Late: Model 2 (logit)"), - ci = TRUE, - single.row = TRUE, - type = "html", - keep.stat=c("aic", "chi2","ll","n"), # only seems to report n? - star.cutoffs = c(0.05, 0.01, 0.001) - ) -``` + * Poisson (counts of half-hours) + +```{r Run personal models} +hhDemandActsDT <- hhDemandActsDT[, anyDA_Personal_m := ifelse(actLabel %like% "Personal", 1, 0)] +personalEarlyDT <- hhDemandActsDT[r_hmsTime >= hms::as.hms("16:00:00") & + r_hmsTime <= hms::as.hms("18:00:00") & + inHome == "At own home" & + r_wday == "Monday-Friday", .(sumDA_Personal_m = sum(anyDA_Personal_m), + minTime = min(r_hmsTime), + maxTime = max(r_hmsTime)), + by = .(ba_pid, ba_diarypid, ba_survey, ba_workingAge, ba_sex, empstat)] # generates a count for poisson +# generate a flag (if we want to use logit) +personalEarlyDT <- personalEarlyDT[, anyDA_Personal_m := ifelse(sumDA_Personal_m > 0, 1, 0)] + +personalLateDT <- hhDemandActsDT[r_hmsTime >= hms::as.hms("18:30:00") & + r_hmsTime <= hms::as.hms("20:30:00") & + inHome == "At own home" & + r_wday == "Monday-Friday", .(sumDA_Personal_m = sum(anyDA_Personal_m), + minTime = min(r_hmsTime), + maxTime = max(r_hmsTime)), + by = .(ba_pid, ba_diarypid, ba_survey, ba_workingAge, ba_sex, empstat)] # generates a count +# generate a flag (if we want to use logit) +personalLateDT <- personalLateDT[, anyDA_Personal_m := ifelse(sumDA_Personal_m > 0, 1, 0)] -Now repeat for media use. +# table +t <- personalEarlyDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), keyby = ba_survey] +kable(caption = "Obs counts for 'early personal' model", t) -#### Run media models: +# table +t <- personalLateDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), keyby = ba_survey] - * Poisson (counts of half-hours) - * Logit (any half-hours) - -```{r Run media models} +kable(caption = "Obs counts for 'late personal' model", t) +# Models ---- # Use robust SE (https://stats.idre.ucla.edu/r/dae/poisson-regression/) # We have repeat observations in years where there were multi-day diaries- see: -# foodEarlyDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), by = ba_survey] - -# Early: Poisson -mediaEarlyModelP1rob <- glmrob(formula = sumDA_Media_m ~ as.factor(ba_survey), family="poisson", mediaEarlyDT) -mediaEarlyModelP2rob <- glmrob(formula = sumDA_Media_m ~ as.factor(ba_survey)*ba_workingAge, family="poisson", mediaEarlyDT) +simpleModel <- "sumDA_Personal_m ~ as.factor(ba_survey)*ba_sex" +fullModel <- "sumDA_Personal_m ~ as.factor(ba_survey) + ba_sex*empstat" -# Logit -mediaEarlyModelL1rob <- glmrob(formula = anyDA_Media_m ~ as.factor(ba_survey), family=binomial(logit), mediaEarlyDT) -mediaEarlyModelL2rob <- glmrob(formula = anyDA_Media_m ~ as.factor(ba_survey)*ba_workingAge, family=binomial(logit), mediaEarlyDT) - -# Late: Poisson -mediaLateModelP1rob <- glmrob(formula = sumDA_Media_m ~ as.factor(ba_survey), family="poisson", mediaLateDT) -mediaLateModelP2rob <- glmrob(formula = sumDA_Media_m ~ as.factor(ba_survey)*ba_workingAge, family="poisson", mediaLateDT) - -# Logit -mediaLateModelL1rob <- glmrob(formula = anyDA_Media_m ~ as.factor(ba_survey), family=binomial(logit), mediaLateDT) -mediaLateModelL2rob <- glmrob(formula = anyDA_Media_m ~ as.factor(ba_survey)*ba_workingAge, family=binomial(logit), mediaLateDT) +personalEarlyModelP1rob <- glmrob(formula = as.formula(simpleModel), family="poisson", personalEarlyDT) +personalEarlyModelP2rob <- glmrob(formula = as.formula(fullModel), family="poisson", + personalEarlyDT) +personalLateModelP1rob <- glmrob(formula = as.formula(simpleModel), family="poisson", personalLateDT) +personalLateModelP2rob <- glmrob(formula = as.formula(fullModel), family="poisson", + personalLateDT) ``` - The following tables report the regression results more neatly. -Early media: +Early personal: -First, the poisson model which predicts counts of half-hours. +```{r checkEarlyPersonal} +summary(personalEarlyDT) +# times +print("From:") +hms::as.hms(min(personalEarlyDT$minTime)) +print("To:") +hms::as.hms(max(personalEarlyDT$maxTime)) -```{r Report early media poisson model results with robust SE, results='asis'} -stargazer(mediaEarlyModelP1rob,mediaEarlyModelP2rob, - column.labels = c("Early: Model 1 (poisson)", "Early: Model 2 (poisson)"), - ci = TRUE, - single.row = TRUE, - type = "html", - keep.stat=c("aic", "chi2","ll","n"), # only seems to report n? - star.cutoffs = c(0.05, 0.01, 0.001) - ) ``` -Second, the logit model which is predicting any media related activities in that period. - -```{r Report early media logit model results with robust SE, results='asis'} -stargazer(mediaEarlyModelL1rob,mediaEarlyModelL2rob, - column.labels = c("Early: Model 1 (logit)", "Early: Model 2 (logit)"), +```{r early personal, results='asis'} +stargazer(personalEarlyModelP1rob,personalEarlyModelP2rob, + column.labels = c("Personal Early: Model 1 (poisson)", "Personal Early: Model 2 (poisson)"), ci = TRUE, single.row = TRUE, type = "html", @@ -1678,26 +1775,19 @@ stargazer(mediaEarlyModelL1rob,mediaEarlyModelL2rob, ) ``` -Late media: - -First, the poisson model which predicts counts of half-hours. - -```{r Report late media poisson model results with robust SE, results='asis'} -stargazer(mediaLateModelP1rob,mediaLateModelP2rob, - column.labels = c("Late: Model 1 (poisson)", "Late: Model 2 (poisson)"), - ci = TRUE, - single.row = TRUE, - type = "html", - keep.stat=c("aic", "chi2","ll","n"), # only seems to report n? - star.cutoffs = c(0.05, 0.01, 0.001) - ) +Late personal: +```{r check laterPersonal} +summary(personalLateDT) +# times +print("From:") +hms::as.hms(min(personalLateDT$minTime)) +print("To:") +hms::as.hms(max(personalLateDT$maxTime)) ``` -Second, the logit model which is predicting any media related activities in that period. - -```{r Report late media logit model results with robust SE, results='asis'} -stargazer(mediaLateModelL1rob,mediaLateModelL2rob, - column.labels = c("Late: Model 1 (logit)", "Late: Model 2 (logit)"), +```{r late personal, results='asis'} +stargazer(personalLateModelP1rob,personalLateModelP2rob, + column.labels = c("Personal Late: Model 1 (poisson)", "Personal Late: Model 2 (poisson)"), ci = TRUE, single.row = TRUE, type = "html", @@ -1706,287 +1796,70 @@ stargazer(mediaLateModelL1rob,mediaLateModelL2rob, ) ``` -#### Detailed plots (misc - for various presentations) - -We now produce a few more detailed plots (for edf presentation). - - -```{r food related by working age} -foodChangeAgeDT <- dtChange1974_2014_workingAgeWtDT[actLabel == "Food"] - -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Weighted, food related)" -ba_createDetailedChangeAgePlot(foodChangeAgeDT) - -myCaption <- "Synthetic MTUS 2014, UK sample (Weighted, food related, in own home)" -foodAgeHomeDT <- aggHhDemandActsInHomeAgeWtDT[actLabel == "Food" & inHome == "At own home" & ba_survey == 2014] -ba_createDetailedAgePlot(foodAgeHomeDT) - -``` - -## Specific activity analysis - -### Car travel ending at home - -This section analyses the patterns of car trips that end at home on the assumption that this might predict the timing of 'charging load' in the context of a direct substitution from petrol/diesel vehicles to EVs. Of course it is likely that car-use practices will evolve to reflect the new affordances of EVs but we do not attempt to model such change here. - -```{r car use ending at home - data setup} -# by day of week & year - -# create suitable day of week var ---- -synthW6aEpsDT <- synthW6aEpsDT[!is.na(r_dow), r_wday := "Monday-Friday"] -synthW6aEpsDT <- synthW6aEpsDT[, r_wday := ifelse(r_dow == "Saturday", - "Saturday", - r_wday)] -synthW6aEpsDT <- synthW6aEpsDT[, r_wday := ifelse(r_dow == "Sunday", - "Sunday", - r_wday)] - -# create a table of car use half hours and the half hour directly afterwards (to see where it is) -synthW6aEpsDT <- synthW6aEpsDT[, - carUseJustEndedAtHome := ifelse(shift(mtrav, type = "lag") == "travel by car etc" & - eloc == "at own home", - 1, # previous half hour had car use, this half hour we are not travelling - 0) - ] - -# test incidence -dt <- synthW6aEpsDT[, .("% halfhours" = 100*mean(carUseJustEndedAtHome, na.rm = TRUE)), - keyby = .(ba_survey,r_wday) - ] - -kable(caption = "% halfhours which followed a car trip ending at home", - dt) - -setkey(synthW6aEpsDT, ba_survey, ba_pid, ba_diarypid) -setkey(mtusW6aUKSurveyDT, ba_survey, ba_pid, ba_diarypid) - -``` - -First we test the unweighted patterns to check the patterns look sensible. - -```{r test unweighted patterns for confidence} -# test pattern -dt <- synthW6aEpsDT[mtusW6aUKSurveyDT][, .(pcHH = 100*mean(carUseJustEndedAtHome)), - keyby = .(ba_survey,r_wday, r_startHalfHour, ba_workingAge) - ] - -myCaption <- "Synthetic MTUS 1974 - 2014, UK sample (Un-weighted)" -ggplot(dt[as.POSIXlt(r_startHalfHour)$hour > 6], aes(x = r_startHalfHour, - y = pcHH, - colour = as.factor(ba_survey), - linetype = as.factor(ba_survey) - ) - ) + - geom_line() + - facet_grid(ba_workingAge ~ r_wday) + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - #scale_colour_grey() + - theme(legend.title = element_blank()) + - theme(legend.position = "bottom") + - labs(caption = myCaption, - x = "Time of day", - y = "% half-hours where car trip just ended @ home" - ) -``` - -The above unweighted chart appears to show virtually no car trips ending at home outside weekdays in 1974. However the following table shows that such acts are extremely rare so the result is correct. We note also the unspecified transport rates higher in 1974 suggesting that at least some car use may not be classified as such. - -```{r check weekend travel 1974} -t <- with(synthW6aEpsDT[r_wday != "Monday-Friday"], - table(mtrav, ba_survey) - ) -kable(caption = "Proportion of weekend half hours reporting car travel (unweighted)", - round(prop.table(t,2)*100,2) - ) -``` - -Now the weighted version. First we show the overall results for the whole population and we exclude 1974 for the purposes of clarity. - -```{r car trips ending at home - data set up - weighted} -# test pattern weighted ---- -setkey(synthW6aEpsDT, ba_survey, ba_pid, ba_diarypid) -setkey(mtusW6aUKSurveyDT, ba_survey, ba_pid, ba_diarypid) - -synthW6aEpsWtDT <- merge(synthW6aEpsDT, mtusW6aUKSurveyDT[, .(ba_survey, - ba_pid, - ba_diarypid, - ba_age_r, - ba_workingAge, - diaryWeight, - indWeight) - ] - ) - -# created weighted version ---- -svySynthW6aEps <- svydesign(ids = ~ba_diarypid, - weights = ~diaryWeight, - data = synthW6aEpsWtDT[!is.na(diaryWeight)] - ) - -ba_getWeightedDT <- function(des,form,byVars){ - # expects DT, byVars and a formula - # the byVars and the formula need to match - dtwTmp <- as.data.table(svytable(eval(form), svySynthW6aEps)) - - dtwFinal <- dtwTmp[, .(nHH = sum(N), - nHHmain = ifelse(carUseJustEndedAtHome == 1, N,0)), - keyby = eval(byVars)] - - # now remove the ones with nHHmain == 0 - dtwFinal <- dtwFinal[nHHmain != 0] - dtwFinal <- dtwFinal[, pcHH := 100*(nHHmain/nHH)] - dtwFinal <- dtwFinal[, r_startHalfHour := as.POSIXct(r_startHalfHour)] # why why why - return(dtwFinal) -} - -form <- as.formula(paste0("~ carUseJustEndedAtHome + ba_survey + r_startHalfHour + r_wday")) -byVars = c("ba_survey", "r_startHalfHour", "r_wday") -weightedDT <- ba_getWeightedDT(svySynthW6aEps, - form, - byVars - ) -form <- as.formula(paste0("~ carUseJustEndedAtHome + ba_survey + r_startHalfHour + r_wday + ba_workingAge")) -byVars = c("ba_survey", "r_startHalfHour", "r_wday", "ba_workingAge") -weightedByWorkingAgeDT <- ba_getWeightedDT(svySynthW6aEps, form,byVars) +### Run Media models -form <- as.formula(paste0("~ carUseJustEndedAtHome + ba_survey + r_startHalfHour + r_wday + season")) -byVars = c("ba_survey", "r_startHalfHour", "r_wday", "season") -weightedBySeasonDT <- ba_getWeightedDT(svySynthW6aEps, form,byVars) - -``` + * Poisson (counts of half-hours) + +test decrease in media at peak time +```{r Run media models} +hhDemandActsDT <- hhDemandActsDT[, anyDA_Media_m := ifelse(actLabel %like% "Media", 1, 0)] +mediaPeakDT <- hhDemandActsDT[r_hmsTime >= hms::as.hms("16:00:00") & + r_hmsTime <= hms::as.hms("18:00:00") & + inHome == "At own home" & + r_wday == "Monday-Friday", .(sumDA_Media_m = sum(anyDA_Media_m), + minTime = min(r_hmsTime), + maxTime = max(r_hmsTime)), + by = .(ba_pid, ba_diarypid, ba_survey, ba_workingAge, ba_sex, empstat)] # generates a count for poisson +# generate a flag (if we want to use logit) +mediaPeakDT <- mediaPeakDT[, anyDA_Media_m := ifelse(sumDA_Media_m > 0, 1, 0)] -```{r car trips ending at home - all weighted } -myCaption <- "Synthetic MTUS 1985 - 2014, UK sample (Weighted, 1974 excluded for clarity)" -ggplot(weightedDT[ba_survey != "1974" & as.POSIXlt(r_startHalfHour)$hour > 6], aes(x = r_startHalfHour, - y = pcHH, - colour = as.factor(ba_survey), - linetype = as.factor(ba_survey) - ) - ) + - geom_line() + - facet_grid(. ~ r_wday) + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - #scale_colour_grey() + - theme(legend.title = element_blank()) + - theme(legend.position = "bottom") + - labs(caption = myCaption, - x = "Time of day", - y = "% half-hours where car trip just ended @ home" - ) -``` +# table +t <- mediaPeakDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), keyby = ba_survey] -Interesting that the weight makes a bit of difference here. +kable(caption = "Obs counts for 'peak media' model", t) -Just 2014. +# Models ---- +# Use robust SE (https://stats.idre.ucla.edu/r/dae/poisson-regression/) +# We have repeat observations in years where there were multi-day diaries- see: +# foodEarlyDT[, .(nObs = .N, nPeople = uniqueN(ba_pid) , nDiaries = uniqueN(ba_diarypid)), by = ba_survey] -```{r 2014 patterns only - all} -ggplot(weightedDT[ba_survey == "2014" & as.POSIXlt(r_startHalfHour)$hour > 6], - aes(x = r_startHalfHour, - y = pcHH, - colour = r_wday, - linetype = r_wday - ) -) + - geom_line() + - #facet_grid(ba_workingAge ~ r_wday) + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - #scale_colour_grey() + - theme(legend.title = element_blank()) + - theme(legend.position = "bottom") + - labs(caption = myCaption, - x = "Time of day", - y = "% half-hours where car trip just ended @ home" - ) +simpleModel <- "sumDA_Media_m ~ as.factor(ba_survey)*ba_sex" +fullModel <- "sumDA_Media_m ~ as.factor(ba_survey) + ba_sex*empstat" +mediaPeakModelP1rob <- glmrob(formula = as.formula(simpleModel), family="poisson", mediaPeakDT) +mediaPeakModelP2rob <- glmrob(formula = as.formula(fullModel), family="poisson", mediaPeakDT) ``` -Now repeat but show different patterns for those aged 16-64 vs 65+, again excluding 1974. - -```{r car trips ending at home - 16-64 vs 65+ weighted} -myCaption <- "Synthetic MTUS 1985 - 2014, UK sample (Weighted, 1974 excluded for clarity)" -ggplot(weightedByWorkingAgeDT[ba_survey != "1974" & as.POSIXlt(r_startHalfHour)$hour > 6], - aes(x = r_startHalfHour, - y = pcHH, - colour = as.factor(ba_survey), - linetype = as.factor(ba_survey) - ) -) + - geom_line() + - facet_grid(ba_workingAge ~ r_wday) + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - #scale_colour_grey() + - theme(legend.title = element_blank()) + - theme(legend.position = "bottom") + - labs(caption = myCaption, - x = "Time of day", - y = "% half-hours where car trip just ended @ home" - ) -``` +The following tables report the regression results more neatly. -```{r 2014 patterns only - by age group} -ggplot(weightedByWorkingAgeDT[ba_survey == "2014" & as.POSIXlt(r_startHalfHour)$hour > 6], - aes(x = r_startHalfHour, - y = pcHH, - colour = r_wday, - linetype = r_wday - ) -) + - geom_line() + - facet_grid(. ~ ba_workingAge) + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - #scale_colour_grey() + - theme(legend.title = element_blank()) + - theme(legend.position = "bottom") + - labs(caption = myCaption, - x = "Time of day", - y = "% half-hours where car trip just ended @ home" - ) +Late media: +```{r check lateMedia} +summary(mediaPeakDT) +# times +print("From:") +hms::as.hms(min(mediaPeakDT$minTime)) +print("To:") +hms::as.hms(max(mediaPeakDT$maxTime)) ``` -Now repeat but show different patterns for different seasons, excluding 1974 & 1985 for clarity (as some months not defined or have so few cases they skew the charts). - -```{r car trips ending at home - by season weighted} -myCaption <- "Synthetic MTUS 1985 - 2014, UK sample (Weighted, 2014 only)" -ggplot(weightedBySeasonDT[ba_survey == "2014" & as.POSIXlt(r_startHalfHour)$hour > 6], - aes(x = r_startHalfHour, - y = pcHH, - colour = season, - linetype = season - ) -) + - geom_line() + - facet_grid(. ~ r_wday) + - scale_x_datetime(date_labels = "%H:%M", date_breaks = "2 hours") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + - #scale_colour_grey() + - theme(legend.title = element_blank()) + - theme(legend.position = "bottom") + - labs(caption = myCaption, - x = "Time of day", - y = "% half-hours where car trip just ended @ home" - ) +```{r Report late media poisson model results with robust SE, results='asis'} +stargazer(mediaPeakModelP1rob,mediaPeakModelP2rob, + column.labels = c("Media Peak: Model 1 (poisson)", "Media Peak: Model 2 (poisson)"), + ci = TRUE, + single.row = TRUE, + type = "html", + keep.stat=c("aic", "chi2","ll","n"), # only seems to report n? + star.cutoffs = c(0.05, 0.01, 0.001) + ) ``` -to be developed further. - -### Media & ICT use (for Janine/Mike) - -> update a comparative graph over time, and the breakdown of patterns by season and day of week. Both for TV and computer/devices - - -Some patterns can be discerned from the 'DEAMND acts' charts above. - +# Run To Here ```{r runToHere} ```