diff --git a/LSOA-Domestic/extract-DECC-LSOA-energy-data-from-excel.do b/LSOA-Domestic/extract-DECC-LSOA-energy-data-from-excel.do
index b916c229d51194b9209a2d2ce11bb1389d3e1d32..4da44694dfca70c39ae1018bb33281ed1d824596 100644
--- a/LSOA-Domestic/extract-DECC-LSOA-energy-data-from-excel.do
+++ b/LSOA-Domestic/extract-DECC-LSOA-energy-data-from-excel.do
@@ -1,11 +1,27 @@
-* Extract data from DECC LSOA level domestic energy consumption data:
-* https://www.gov.uk/government/collections/mlsoa-and-llsoa-electricity-and-gas-estimates
-
-* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
-* b.anderson@soton.ac.uk
-* (c) University of Southampton
-* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
-* http://creativecommons.org/licenses/by-nc/4.0/
+*******************************************
+* Script to:
+* - Extract data from DECC LSOA level domestic energy consumption data (https://www.gov.uk/government/collections/mlsoa-and-llsoa-electricity-and-gas-estimates)
+
+/*   
+
+Copyright (C) 2014  University of Southampton
+
+Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
+	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License 
+(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+#YMMV - http://en.wiktionary.org/wiki/YMMV
+
+*/
 
 clear all
 
diff --git a/NEED/analyse-NEED-EULF-2014-descriptives.do b/NEED/analyse-NEED-EULF-2014-descriptives.do
index b33f25bdbf1357d9841e80199cde0ce49f70da99..2b0b524b69214b2acc14d8a566c4b086bb043a53 100644
--- a/NEED/analyse-NEED-EULF-2014-descriptives.do
+++ b/NEED/analyse-NEED-EULF-2014-descriptives.do
@@ -1,17 +1,31 @@
-* Script to analyse DECC's NEED data to:
-* examine distributions etc
-
-* NB this script uses 2 data files derived from the original data using the 'process' script
+* Script to:
+* - analyse DECC's EULF 2014 NEED data to examine distributions etc 
 
 * Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
 * http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
 
-* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
-* b.anderson@soton.ac.uk
-* (c) University of Southampton
+* NB this script uses 2 data files derived from the original data using the 'process' script
+
+/*   
+
+Copyright (C) 2014  University of Southampton
+
+Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
+	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License 
+(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+#YMMV - http://en.wiktionary.org/wiki/YMMV
 
-* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
-* http://creativecommons.org/licenses/by-nc/4.0/
+*/
 
 clear all
 
diff --git a/NEED/analyse-NEED-EULF-2014-models-v1.1.do b/NEED/analyse-NEED-EULF-2014-models-v1.1.do
deleted file mode 100644
index cf12a97cb3131e7bc322632392d8c0c9f1d363f7..0000000000000000000000000000000000000000
--- a/NEED/analyse-NEED-EULF-2014-models-v1.1.do
+++ /dev/null
@@ -1,189 +0,0 @@
-* Script to analyse DECC's NEED data to:
-* investigate % variance of energy consumption due to dwelling type variables as a way to infer the % of variance due to people
-
-* NB this script uses 2 data files derived from the original data using the 'process' script
-
-* Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
-* http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
-
-* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
-* b.anderson@soton.ac.uk
-* (c) University of Southampton
-
-* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
-* http://creativecommons.org/licenses/by-nc/4.0/
-
-clear all
-
-capture noisily log close
-
-* written for Mac OSX - remember to change filesystem delimiter for other platforms
-local home "/Users/ben/Documents"
-local proot "`home'/Work/Data/Social Science Datatsets/DECC"
-* for clam
-* local proot "`home'/Work/NEED"
-local dpath "`proot'/NEED/End User Licence File 2014/processed"
-local rpath "`proot'/results/NEED/"
-
-* local verrsion "1.0"
-* initial models - all households for electricity models
-
-local verrsion "1.1"
-* restrict to gas only households to avoid complications of:
-* - primary electric heating (presumably)
-* - oil heating
-
-set more off
-
-log using "`rpath'/analyse-NEED-EULF-2014-models-`version'-$S_DATE.smcl", replace
-
-* use the pre-processed wide form file which contains all years of consumption data but not the constant values which are in the xwave file
-use "`dpath'/need_eul_may2014_consumptionfile_wide.dta", clear
-
-* we're going to use 2012 data only
-
-keep HH_ID *2012*
-
-* log the consumption as it's very skewed -> becomes semi-normal & OK for linear regression
-* Gcons = gas
-* Econs = Electricity
-* Presumably those without gas use oil or electricity for heating - we don't have oil so we should probably restrict analysis to gas-using hosueholds only to avoid this confounding factor?
-
-* check what's valid
-tab Gcons2012Valid Econs2012Valid, mi // what does G,L,M mean? Presumably O = off gas?
-tabstat Gcons2012, by(Gcons2012Valid) s(mean min max n)
-
-keep if Gcons2012Valid == "V"
-
-gen log_Gcons2012 = log(Gcons2012)
-gen log_Econs2012 = log(Econs2012)
-
-* combine consumption
-* treat missing (gas) as 0
-egen Allcons2012 = rowtotal(Gcons2012 Econs2012)
-
-gen log_Allcons2012 = log(Allcons2012)
-* create log consumption quintiles
-egen quinlog_Allcons2012 = cut(log_Allcons2012), group(5)
-egen quinlog_Gcons2012 = cut(log_Gcons2012), group(5)
-egen quinlog_Econs2012 = cut(log_Econs2012), group(5)
-
-* merge in the pre-processed cross-year fixed values file
-merge 1:1 HH_ID using "`dpath'/need_eul_may2014_xwavefile.dta"
-
-* fix some of the variables
-
-* combine IMD: this is a bit dodgy as they are not strictly comparable
-gen ba_imd = IMD_ENG
-replace ba_imd = IMD_WALES if ba_imd == .
-
-* must use as category variables!!
-* set unkown to be 10 -> adds to end of contrasts so can see effect
-replace LOFT_DEPTH = 10 if LOFT_DEPTH == .
-
-* set unkown to be 2020 -> adds to end of contrasts so can see effect
-replace BOILER_YEAR = 2020 if BOILER_YEAR == .
-replace CWI_YEAR = 2020 if CWI_YEAR == .
-replace LI_YEAR  = 2020 if LI_YEAR  == .
-
-* 0 = no
-destring BOILER, force replace
-replace BOILER = 0 if BOILER == .
-
-* household level vars
-local generic_hvars "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_TYPE i.PROP_AGE i.EE_BAND "
-local generic_hvarsnp "i.BOILER_YEAR i.MAIN_HEAT_FUEL i.LI_YEAR i.LOFT_DEPTH i.FLOOR_AREA_BAND WALL_CONS i.CWI_YEAR i.PROP_AGE i.EE_BAND "
-
-* area level vars
-local generic_rvars "i.ba_region i.ba_imd"
-
-* define different property types
-local ptypes "101 102 103 104 105 106"
-local pt101 "detached"
-local pt102 "semi"
-local pt103 "end_terr"
-local pt104 "mid_terr"
-local pt105 "bung"
-local pt106 "flat"
-
-* now loop over the energy types & run linear regression models
-* NB - the rounding of the consumption values may lead to modelling problems
-
-local vars "Gcons Econs Allcons"
-foreach v of local vars {
-	* check distributions of original consumption values
-	histogram `v'2012, by(MAIN_HEAT_FUEL, total) name(histo_`v')
-	tabstat `v'2012, by(MAIN_HEAT_FUEL) s(n mean min max)
-	
-	* all hhs model
-	qui: regress log_`v'2012 `generic_hvars' ///
-		`generic_rvars' ///
-		i.BOILER_YEAR
-	
-	est store rlog_`v'2012
-	di "* -> `v' estat to test for heteroskedasticity & omitted vars"
-	estat ovtest
-	estat hettest
-	
-	* we ought to be testing for linearity too
-	di "* -> `v' linktest to test for model specification"
-	di "* if p of _hatsq < 0.05 -> mis-spec"
-	di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm"
-	linktest	
-	
-	* models by property type - to see if rsq & coefficients vary
-	foreach p of local ptypes {
-		di "* -> testing log_`v'2012 for `pt`p''"
-		qui: regress log_`v'2012 `generic_hvarsnp' ///
-			`generic_rvars'	///
-			i.BOILER_YEAR ///
-			if PROP_TYPE == `p'
-		est store rlog_`v'2012_`pt`p''
-		
-		di "* -> `v' 2012 `pt`p'' - estat to test for heteroskedasticity & omitted vars"
-		estat ovtest
-		estat hettest
-		
-		* we ought to be testing for linearity too
-		di "* -> `v' `pt`p'' linktest to test for model specification"
-		di "* if p of _hatsq < 0.05 -> mis-spec"
-		di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm"
-		linktest	
-	}
-	* models for different consumption quintiles - to see if rsq & coefficients vary
-	foreach q of numlist 0/4 {
-		di "* -> testing log_`v'2012 for quintile: `q'"
-		qui: regress log_`v'2012 `generic_hvars' ///
-			`generic_rvars'	///
-			i.BOILER_YEAR ///
-			if quinlog_`v'2012 == `q'
-		est store rlog_`v'2012q`q'
-		
-		di "* -> quintile: `q' - estat to test for heteroskedasticity & omitted vars"
-		estat ovtest
-		estat hettest
-		
-		* we ought to be testing for linearity too
-		di "* -> quintile: `q' - linktest"
-		di "* if p of _hatsq < 0.05 -> mis-spec"
-		di "* http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm"
-		linktest	
-	}
-}
-
-* output all the results - that's a lot of t tests!
-* we could put them all out in one file but it would be really hard to find the ones you want!
-
-estout rlog_Gcons2012 using "`rpath'/NEED-EULF-2014-log-gas-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout rlog_Gcons2012q* using "`rpath'/NEED-EULF-2014-log-gas-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout rlog_Gcons2012_* using "`rpath'/NEED-EULF-2014-log-gas-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout rlog_Econs2012 using "`rpath'/NEED-EULF-2014-log-elec-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout rlog_Econs2012q* using "`rpath'/NEED-EULF-2014-log-elec-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout rlog_Econs2012_* using "`rpath'/NEED-EULF-2014-log-elec-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout rlog_Allcons2012 using "`rpath'/NEED-EULF-2014-log-energy-model-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout rlog_Allcons2012q* using "`rpath'/NEED-EULF-2014-log-energy-models-quintiles-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-estout rlog_Allcons2012_* using "`rpath'/NEED-EULF-2014-log-energy-models-by-property-type-`version'-$S_DATE.txt", replace cells("b se p _star") stats(r2 r2_a N ll)
-
-di "* Done!"
-
-log close
diff --git a/NEED/analyse-NEED-EULF-2014-models-v2.0.do b/NEED/analyse-NEED-EULF-2014-models-v2.0.do
index a25b372d33cc0fcc40480e9a021bea959e89e29d..e656ad58b2b0a61674a0a50806c904ae02cefa63 100644
--- a/NEED/analyse-NEED-EULF-2014-models-v2.0.do
+++ b/NEED/analyse-NEED-EULF-2014-models-v2.0.do
@@ -1,17 +1,31 @@
-* Script to analyse DECC's NEED data to:
-* investigate % variance of energy consumption due to dwelling type variables as a way to infer the % of variance due to people
+* Script to analyse DECC's 2014 EULF NEED data to:
+* - investigate % variance of energy consumption due to dwelling type variables as a way to infer the % of variance due to people
 
 * NB this script uses 2 data files derived from the original data using the 'process' script
 
 * Original data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
 * http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
 
-* Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
-* b.anderson@soton.ac.uk
-* (c) University of Southampton
+/*   
 
-* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
-* http://creativecommons.org/licenses/by-nc/4.0/
+Copyright (C) 2014  University of Southampton
+
+Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
+	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License 
+(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+#YMMV - http://en.wiktionary.org/wiki/YMMV
+
+*/
 
 clear all
 
diff --git a/NEED/process-NEED-EULF-2014.do b/NEED/process-NEED-EULF-2014.do
index ede8ae86dc1e27484b0b415d733df99ec3ebd8af..a7c556db142714937b25962252631f950a575c35 100644
--- a/NEED/process-NEED-EULF-2014.do
+++ b/NEED/process-NEED-EULF-2014.do
@@ -14,12 +14,26 @@
 * Gcons<year>valid variable has undefined labels: G, L, M = ? Presumably 0 = off gas & V = valid?
 * ideally DECC should set missing to -99 to aid re-coding and avoid unpleasant surprises in naive analysis!
 
-* Author: Ben Anderson, Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton
-* b.anderson@soton.ac.uk
-* (c) University of Southampton
+/*   
 
-* Unless there is a different license file in the folder in which this script is found, the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license applies
-* http://creativecommons.org/licenses/by-nc/4.0/
+Copyright (C) 2014  University of Southampton
+
+Author: Ben Anderson (b.anderson@soton.ac.uk, @dataknut, https://github.com/dataknut) 
+	[Energy & Climate Change, Faculty of Engineering & Environment, University of Southampton]
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License 
+(http://choosealicense.com/licenses/gpl-2.0/), or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+#YMMV - http://en.wiktionary.org/wiki/YMMV
+
+*/
 
 clear all
 
diff --git a/NEED/results/README.md b/NEED/results/README.md
deleted file mode 100644
index 07bf0caf577b6cb638a52bde4aa5168a3f54b031..0000000000000000000000000000000000000000
--- a/NEED/results/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-DECC-git NEED results
-============
-
-A collection of results from analysis of data from the public versions of DECC's  NEED dataset
-
-Original 'End User License' version of the data available from: UK DATA ARCHIVE: Study Number 7518 - National Energy Efficiency Data-Framework, 2014
-http://discover.ukdataservice.ac.uk/catalogue/?sn=7518
-
-The charts here show r sq values for different 'kichen sink' multivariate regression models. The idea was to see how much of the variation in gas & electricity consumption could be attributed to the 'physical' variables in the datasets and thus that the residual would represent the 'effect' of people's behaviour on energy consumption - see stats code for explanations!
\ No newline at end of file
diff --git a/NEED/results/all2012-model-EULF.png b/NEED/results/all2012-model-EULF.png
deleted file mode 100644
index a0625136b66ba01a50b3bb5f79a35cbc189269bb..0000000000000000000000000000000000000000
Binary files a/NEED/results/all2012-model-EULF.png and /dev/null differ
diff --git a/NEED/results/gas2012-model-EULF.png b/NEED/results/gas2012-model-EULF.png
deleted file mode 100644
index 6471f86de0cbcba2e2228322bc0887c24d0bdc71..0000000000000000000000000000000000000000
Binary files a/NEED/results/gas2012-model-EULF.png and /dev/null differ