diff --git a/CER-data-processing-samples.do b/CER-data-processing-samples.do
new file mode 100644
index 0000000000000000000000000000000000000000..928188436a08939622cd0e0a8969300add53b450
--- /dev/null
+++ b/CER-data-processing-samples.do
@@ -0,0 +1,80 @@
+/*  
+**************************************************************
+* Data preparation for ESRC Transformative project
+* - Using the Commission for Energy Regulation (CER)'s Irish Smart Meter Trial data
+*   - http://www.ucd.ie/issda/data/commissionforenergyregulationcer/
+
+* processes the original data for further use:
+* - sample allocation = .csv file
+
+* This work was funded by RCUK through the ESRC's Transformative Social Science Programme via the
+* "Census 2022: Transforming Small Area Socio-Economic Indicators through 'Big Data'" Project 
+* - http://gtr.rcuk.ac.uk/project/2D2CD798-4F04-4399-B1AF-D810A233DD21
+* - http://www.energy.soton.ac.uk/tag/census2022/
+ 
+Copyright (C) 2014  University of Southampton
+
+Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
+	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License 
+(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+#YMMV - http://en.wiktionary.org/wiki/YMMV
+
+*/
+
+global where "~/Documents/Work"
+
+* project
+global proot "$where/Data/CER Smart Metering Project"
+
+* data
+global dpath "$proot/data"
+
+* log
+global logpath "$proot/data/processed/logs"
+
+global version "v1"
+
+set more off
+
+clear all
+
+capture log close
+
+log using "$logpath/CER-data-processing-samples-$version.smcl", replace
+
+timer clear
+
+timer on 1
+
+************************************
+************************************
+* start with the pre-trial survey
+insheet using "$dpath/original/CER_both/CER Electricity Revised March 2012/SME and Residential allocations.csv", names comma clear
+
+********
+* sample allocations
+gen ba_sample = 1 if code == 1
+replace ba_sample = 2 if code == 2
+replace ba_sample = 3 if code == 3
+
+lab var ba_sample "Sample membership"
+lab def ba_sample 1 "Residential" 2 "SME" 3 "Other"
+
+
+save "$dpath/processed/CER_sample_allocation_$version.dta", replace
+
+timer off 1
+di "Time taken:"
+timer list 1
+
+log close