ONS-TU-2005-convert-to-long.do 6.06 KB
Newer Older
1
**************************************************************
Ben Anderson's avatar
Ben Anderson committed
2
3
4
5
* Process ONS 2005 time-use data to:
* - long format
* - set stata dates/times 
* - separate time diary data from survey/aggregate data
6

Ben Anderson's avatar
Ben Anderson committed
7
8
* - data available from: http://discover.ukdataservice.ac.uk/catalogue/?sn=5592

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/*   

Copyright (C) 2014  University of Southampton

Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License 
(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

#YMMV - http://en.wiktionary.org/wiki/YMMV

*/

clear all

* change these to run this script on different PC
Ben Anderson's avatar
Ben Anderson committed
33
local where "~/Documents/Work"
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
local proot "`where'/Data/Social Science Datatsets/Time Use 2005"
* location of time-use diary data
local dpath "`proot'/UKDA-5592-stata8-v2/stata8"


* version
* local v = "v1.0"
* original

* using updated UKDA time use file (v2) June 2007, (2nd Edition)
* has more detailed codes
* http://discover.ukdataservice.ac.uk/catalogue/?sn=5592
local v = "v2.0"

capture log close

local do_collapse 1

* make script run without waiting for user input
set more off

* get diary data
use "`dpath'/timeusefinal_for_archive2.dta", clear

* according to the userguide code DiaryDay 1 might be Sunday!
recode diaryday (1=7) (2=1) (3=2) (4=3) (5=4) (6=5) (7=6), gen(ba_dow)

lab def ba_dow  1 "Monday" 2 "Tuesday" 3 "Wednesday" 4 "Thursday" 5 "Friday" 6 "Saturday" 7 "Sunday"
lab val ba_dow ba_dow
lab var ba_dow "Day of week (from diaryday)"

tab diaryday ba_dow

***************
* save out a survey file with no time use data - can merge back in later
preserve
	drop pact* sact* lact* aprim* asec* p_* s_* loc* comp*
	compress
	save "`proot'/processed/timeusefinal_for_archive_survey_`v'.dta", replace
	* save a .csv version for R
	outsheet using "`proot'/processed/timeusefinal_for_archive_survey_`v'.csv", comma nolabel replace
restore

* keep the diary data only

keep serial net_wgt month ba_dow pact* sact* lact* 

****************
* convert to long format and set up stata time variables
reshape long pact sact lact, i(serial)

rename _j t_slot
* t_slot now has values 1 -> 144 (10 minute slots)
87
88
89
90
91
92
93
94
95
96
97
98
99
100
* change to 0 -> 143 to make time & date easier
replace t_slot = t_slot - 1

* which hour is it?
gen t_hourfromslot = floor(t_slot/6)
replace t_hourfromslot = 0 if t_hourfromslot == 24

* diary starts at 04:00
* NB this puts > 00:00 to the start of the diary day - remember this if doing sequences through 04:00
* also some charts will show discontinuities at 04:00
gen t_hour = t_hourfromslot + 4

* fix the t_hour > 24 problem
replace t_hour = t_hour - 24 if t_hour > 23
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203

* calculate minute from slot (end of slot)
gen min = mod(t_slot,6)
gen t_min = 0 if min == 0
replace t_min = 10 if min == 1
replace t_min = 20 if min == 2
replace t_min = 30 if min == 3
replace t_min = 40 if min == 4
replace t_min = 50 if min == 5

* fix dates
* Feb: we are going to assume this was the first full week of feb where Monday was the 7th
gen t_day = ba_dow + 6 if month == 1

* June: we are going to assume this was the first full week of june where Monday was the 6th
replace t_day = ba_dow + 5 if month == 2

* Sept: we are going to assume this was the first full week of sept where Monday was the 5th
replace t_day = ba_dow + 4 if month == 3

* Nov: we are going to assume this was the first full week of nov where Monday was the 7th
replace t_day = ba_dow + 6 if month == 4

* fix the '> 24' problem
* make it tomorrow
replace t_day = t_day + 1 if t_hour >= 24
* make it in the morning (tomorrow)
replace t_hour = t_hour - 24 if t_hour >= 24
		
gen t_month = 2 if month == 1
replace t_month = 6 if month == 2
replace t_month = 9 if month == 3
replace t_month = 11 if month == 4

gen t_year = 2005
gen t_sec = 0
gen double s_datetime=  mdyhms(t_month,t_day,t_year,t_hour, t_min, t_sec)
format s_datetime %tc
gen s_dow = dow(dofc(s_datetime))
lab def s_dow 0 "Sunday" 1 "Monday" 2 "Tuesday" 3 "Wednesday" 4 "Thursday" 5 "Friday" 6 "Saturday"
lab var s_dow "Day of week (STATA form)"
lab val s_dow s_dow
tab s_dow ba_dow
* NB: s_dow is the ACTUAL day, ba_dow is the day the diary started!
lab var s_datetime "Date & time slot starts"

destring t_min, force replace
destring t_hour, force replace

recode t_min (0/29 = "00") (30/59 = "30"), gen(t_hhmin)
egen t_halfhour = concat(t_hour t_hhmin), punct(":")
lab var t_halfhour "Time of day (half hours)"

* create a fake stata time
egen t_time = concat(t_hour t_min), punct(":")
gen double s_starttime = clock(t_time,"hm")
format s_starttime %tcHH:MM
lab var s_starttime "Time slot starts"

* create a fake half hour
gen double s_halfhour = clock(t_halfhour,"hm")
format s_halfhour %tcHH:MM
lab var s_halfhour "Time of day (half hours)"

lab var lact "Location"
lab var pact "Primary activity"
lab var sact "Secondary activity"

lab var t_slot "Diary slot (144 * 10 mins)"
lab var t_month "Month diary completed"

*sort t_time
li t_slot month t_month ba_dow s_* in 1/10

* run checks
tab month t_month
tab s_dow ba_dow

* where is location missing?
gen missing_loc = 0
replace missing_loc = 1 if lact == -1
lab var missing_loc "Location missing"

* where are secondary acts missing?
gen missing_sec = 0
replace missing_sec = 1 if sact == -1
lab var missing_sec "Secondary act missing"

keep serial net_wgt t_slot t_month s_* pact sact lact missing_loc missing_sec

order serial net_wgt t_month t_slot s_*

xtset serial s_datetime, delta(10 minutes)

compress

* save it!
save "`proot'/processed/timeusefinal_for_archive_diary_long_`v'.dta", replace
* save a .csv version for R
outsheet using "`proot'/processed/timeusefinal_for_archive_diary_long_`v'.csv", comma nolabel replace


di "* -> done!"