diff --git a/CER-data-processing-original.do b/CER-data-processing-original.do
index 75c4141ec758b8ca2d2ad6c3552952a4ab54ee3c..e4a5275b1f702f2063fe3c36e75e360ff6718809 100644
--- a/CER-data-processing-original.do
+++ b/CER-data-processing-original.do
@@ -229,18 +229,41 @@ drop date ds_halfhour halfhour hour mins sec ds ts_ds
 
 compress
 
+tab s_dow, mi
+
 * test to see if missing half hours and ids
 
 xtset ID s_datetime, delta(30 minutes)
 
-* possibly
+* possibly - there are some gaps (missing ids/times within series) and it is unbalanced (missing id/time combinations at each end)
 
-* expand to all 30 seconds between the first and last obs for each hubid
-* (increases file size quite a bit)
-* , full -> imputes ALL missing periods
+* expand to all 1/2 hours seconds between the first and last obs for each hubid - increases file size quite a bit
+* , full -> this option imputes ALL missing periods - greatly inflates the file size so only use if necessary
 * puts . into any missing var - fix that later
 tsfill
 
+tab s_dow, mi
+
+gen is_missing = 0
+replace is_missing = 1 if s_dow == .
+
+tab is_missing
+
+* drop the derived time variables which will have missing values in the gaps we have filled
+drop s_date s_dow s_halfhour
+* rebuild them from s_datetime
+* NB: if we did not specify the  'full' option this will only impute missing datetimes
+* between the first & last ID observation - so it fill sin gaps, it does NOT full up all 
+* possible datetimes from the start to the end of the sample
+gen double s_dow = dow(dofc(s_datetime))
+gen double s_date = mdy(month(dofc(s_datetime)), day(dofc(s_datetime)), year(dofc(s_datetime)))
+format s_date %td
+gen double s_halfhour = hh(s_datetime) + mm(s_datetime) + ss(s_datetime)
+format s_halfhour %tc
+* so there are 96 missing. Funnily enough that is 2 * 48
+
+compress
+
 save "$odfiles/processed/HH2009_long_filled.dta", replace