diff --git a/NAMESPACE b/NAMESPACE
index bf0d5781aa6f8abab21bdd6f6ac49c23c16bab0f..f43358e6568fb6cf4e6abfb78e2418945104d99e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,6 +15,7 @@ export(Zbeta_Zscore)
 export(Zbeta_expected)
 export(Zbeta_log_rsq_over_expected)
 export(Zbeta_rsq_over_expected)
+export(create_LDprofile)
 importFrom(stats,cor)
 importFrom(stats,na.omit)
 importFrom(stats,pbeta)
diff --git a/R/create_LDprofile.R b/R/create_LDprofile.R
new file mode 100644
index 0000000000000000000000000000000000000000..2a7828666e485e55ee47222e107958ee3265b67c
--- /dev/null
+++ b/R/create_LDprofile.R
@@ -0,0 +1,137 @@
+
+#' Creates an LD profile
+#'
+#' An LD (linkage disequilibrium) profile is a look-up table that tell you the expected correlation between SNPs given the genetic distance between them.
+#'
+#' In the output, bins represent lower bounds. The first bin contains pairs where the genetic distance is greater than or equal to 0 and less than \code{bin_size}. The final bin contains pairs where the genetic distance is greater than or equal to \code{max_dist}-\code{bin_size} and less than \code{max_dist}.
+#' If the \code{max_dist} is not an increment of \code{bin_size}, it will be adjusted to the next highest increment.The maximum bin will be the bin that \code{max_dist} falls into. For example, if the \code{max_dist} is given as 4.5 and the \code{bin_size} is 1, the final bin will be 4.\cr
+#' By default, Beta parameters are not calculated. To calcualte Beta parameters, needed for the \code{\link{Zalpha_BetaCDF}} and \code{\link{Zbeta_BetaCDF}} statistics, \code{beta_params} should be set to TRUE and the package \code{fitdistrplus} must be installed.
+#'
+#' @param dist A numeric vector containing genetic distances.
+#' @param x A matrix of SNP values. Columns represent chromosomes; rows are SNP locations. Hence, the number of rows should equal the length of the \code{dist} vector. SNPs should all be biallelic.
+#' @param bin_size The size of each bin, in the same units as \code{dist}.
+#' @param max_dist Optional. The maximum genetic distance to be considered. If this is not supplied, it will default to the maximum distance in the \code{dist} vector.
+#' @param beta_params Optional. Beta parameters are calculated if this is set to TRUE. Default is FALSE.
+#'
+#' @return A data frame containing an LD profile that can be used by other statistics in this package.
+#' @references Jacobs, G.S., T.J. Sluckin, and T. Kivisild, \emph{Refining the Use of Linkage Disequilibrium as a Robust Signature of Selective Sweeps.} Genetics, 2016. \strong{203}(4): p. 1807
+#' @examples
+#' ## load the snps example dataset
+#' data(snps)
+#' ## Create an LD profile using this data
+#' create_LDprofile(snps$distances,as.matrix(snps[,3:12]),0.001)
+#'
+#' @export
+#' @seealso \code{\link{Zalpha_expected}} \code{\link{Zalpha_rsq_over_expected}} \code{\link{Zalpha_log_rsq_over_expected}} \code{\link{Zalpha_Zscore}} \code{\link{Zalpha_BetaCDF}} \code{\link{Zbeta_expected}} \code{\link{Zbeta_rsq_over_expected}} \code{\link{Zbeta_log_rsq_over_expected}} \code{\link{Zbeta_Zscore}} \code{\link{Zbeta_BetaCDF}} \code{\link{Zalpha_all}}
+#'
+create_LDprofile<-function(dist,x,bin_size,max_dist=NULL,beta_params=FALSE){
+  #Checks
+  #Check dist is vector
+  if (is.numeric(dist) ==FALSE || is.vector(dist)==FALSE){
+    stop("dist must be a numeric vector")
+  }
+  #Check x is a matrix
+  if (is.matrix(x)==FALSE){
+    stop("x must be a matrix")
+  }
+  #Check x has rows equal to the length of dist
+  if (length(dist) != nrow(x)){
+    stop("The number of rows in x must equal the number of SNP genetic distances given in dist")
+  }
+  #Check SNPs are all biallelic
+  if (sum(apply(x,1,function(x){length(na.omit(unique(x)))}) != 2)>0){
+    stop("SNPs must all be biallelic")
+  }
+  #Change matrix x to numeric if it isn't already
+  if (is.numeric(x)==FALSE){
+    x<-matrix(as.numeric(factor(x)),nrow=dim(x)[1])
+  }
+  #Check bin_size is a number
+  if (is.numeric(bin_size) ==FALSE || bin_size <= 0){
+    stop("bin_size must be a number greater than 0")
+  }
+  #Check max_dist is a number or NULL
+  if (is.null(max_dist)==FALSE){
+    if (is.numeric(max_dist) ==FALSE || max_dist <= 0){
+      stop("max_dist must be a number greater than 0")
+    }
+  } else {
+    #Set max_dist to the maximum distance in the data if it was not supplied
+    max_dist<-dist[length(dist)]-dist[1]
+  }
+  #Adjusts the Max_dist value so it is equal to an increment of bin_size if it isn't already
+  if(!isTRUE(all.equal(max_dist,assign_bins(bin_size,max_dist)))){
+    max_dist<-assign_bins(bin_size,max_dist)+bin_size
+  }
+  #Check beta_params is logical
+  if (is.logical(beta_params)==FALSE){
+    stop("beta_params must be TRUE or FALSE")
+  }
+  #If beta_params is TRUE, check for fitdistrplus package
+  if (beta_params==TRUE){
+    if (requireNamespace("fitdistrplus", quietly = TRUE)==FALSE) {
+      stop("Package \"fitdistrplus\" needed for Beta parameters to be calculated. Please install it.")
+    }
+  }
+
+  #Find the differences in genetic distances between pairs of SNPs
+  diffs<-lower_triangle(outer(dist,dist,"-"))
+
+  #Find the rsquared value between pairs of SNPs
+  rsq<-lower_triangle(cor(t(x))^2)
+
+  #Filter for just those less than the max genetic distance
+  rsq<-rsq[diffs<max_dist]
+  diffs<-diffs[diffs<max_dist]
+
+  #Assign diffs to bins
+  bins<-assign_bins(bin_size,diffs)
+
+  #Create LDprofile data frame
+  LDprofile<-data.frame(bin=seq(0,max_dist-bin_size,bin_size),rsq=NA,sd=NA,Beta_a=NA,Beta_b=NA,n=NA)
+
+  print.default("test")
+
+  #Loop for each bin (i)
+  for (i in 1:nrow(LDprofile)){
+    LDprofile$n[i]<-sum(bins==LDprofile$bin[i])
+    #If there is at least one pair whose egentic distance falls within the bin, calculate stats
+    if (LDprofile$n[i]>0){
+      #Get the rsquared values for all pairs in this bin
+      temprsq<-rsq[bins==LDprofile$bin[i]]
+      #Calculate the mean
+      LDprofile$rsq[i]<-mean(temprsq)
+      #Calculate the standard deviation
+      LDprofile$sd[i]<-sd(temprsq)
+
+      #Calculate Beta distribution parameters if required
+      #Do not calculate for bins containing less than two pairs or the standatd deviation is zero
+      if (beta_params==TRUE & LDprofile$n[i]>1 & LDprofile$sd[i]>0){
+        if (sum(temprsq==1 | temprsq==0)>0){
+          #If there are any 0s or 1s adjust the data
+          temprsq<-(temprsq*(length(temprsq)-1)+0.5)/length(temprsq)
+        }
+        #Try to fit the data to a Beta distribution
+        betafit<-try(fitdistrplus::fitdist(temprsq,"beta"))
+        if (class(betafit) != "try-error"){
+          LDprofile$Beta_a[i]<-betafit$estimate[1]
+          LDprofile$Beta_b[i]<-betafit$estimate[2]
+        } else {
+          #If failed to fit, try again using estimated beta parameters to initialise
+          print.default("errored")
+          startBetaParams<-est_Beta_Params(LDprofile$rsq[i], LDprofile$sd[i]^2)
+          betafit<-try(fitdistrplus::fitdist(temprsq,"beta",start=list(shape1=startBetaParams$alpha, shape2=startBetaParams$beta)))
+          if (class(betafit) != "try-error"){
+            LDprofile$Beta_a[i]<-betafit$estimate[1]
+            LDprofile$Beta_b[i]<-betafit$estimate[2]
+          } else {
+            #If Beta parameters cannot be fitted, return NA
+            LDprofile$Beta_a[i]<-NA
+            LDprofile$Beta_b[i]<-NA
+          }
+        }
+      }
+    }
+  }
+  return(LDprofile)
+}
diff --git a/man/create_LDprofile.Rd b/man/create_LDprofile.Rd
new file mode 100644
index 0000000000000000000000000000000000000000..95c51f28f1278f70c9bb35b517b0123de0ecda60
--- /dev/null
+++ b/man/create_LDprofile.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/create_LDprofile.R
+\name{create_LDprofile}
+\alias{create_LDprofile}
+\title{Creates an LD profile}
+\usage{
+create_LDprofile(dist, x, bin_size, max_dist = NULL, beta_params = FALSE)
+}
+\arguments{
+\item{dist}{A numeric vector containing genetic distances.}
+
+\item{x}{A matrix of SNP values. Columns represent chromosomes; rows are SNP locations. Hence, the number of rows should equal the length of the \code{dist} vector. SNPs should all be biallelic.}
+
+\item{bin_size}{The size of each bin, in the same units as \code{dist}.}
+
+\item{max_dist}{Optional. The maximum genetic distance to be considered. If this is not supplied, it will default to the maximum distance in the \code{dist} vector.}
+
+\item{beta_params}{Optional. Beta parameters are calculated if this is set to TRUE. Default is FALSE.}
+}
+\value{
+A data frame containing an LD profile that can be used by other statistics in this package.
+}
+\description{
+An LD (linkage disequilibrium) profile is a look-up table that tell you the expected correlation between SNPs given the genetic distance between them.
+}
+\details{
+In the output, bins represent lower bounds. The first bin contains pairs where the genetic distance is greater than or equal to 0 and less than \code{bin_size}. The final bin contains pairs where the genetic distance is greater than or equal to \code{max_dist}-\code{bin_size} and less than \code{max_dist}.
+If the \code{max_dist} is not an increment of \code{bin_size}, it will be adjusted to the next highest increment.The maximum bin will be the bin that \code{max_dist} falls into. For example, if the \code{max_dist} is given as 4.5 and the \code{bin_size} is 1, the final bin will be 4.\cr
+By default, Beta parameters are not calculated. To calcualte Beta parameters, needed for the \code{\link{Zalpha_BetaCDF}} and \code{\link{Zbeta_BetaCDF}} statistics, \code{beta_params} should be set to TRUE and the package \code{fitdistrplus} must be installed.
+}
+\examples{
+## load the snps example dataset
+data(snps)
+## Create an LD profile using this data
+create_LDprofile(snps$distances,as.matrix(snps[,3:12]),0.001)
+
+}
+\references{
+Jacobs, G.S., T.J. Sluckin, and T. Kivisild, \emph{Refining the Use of Linkage Disequilibrium as a Robust Signature of Selective Sweeps.} Genetics, 2016. \strong{203}(4): p. 1807
+}
+\seealso{
+\code{\link{Zalpha_expected}} \code{\link{Zalpha_rsq_over_expected}} \code{\link{Zalpha_log_rsq_over_expected}} \code{\link{Zalpha_Zscore}} \code{\link{Zalpha_BetaCDF}} \code{\link{Zbeta_expected}} \code{\link{Zbeta_rsq_over_expected}} \code{\link{Zbeta_log_rsq_over_expected}} \code{\link{Zbeta_Zscore}} \code{\link{Zbeta_BetaCDF}} \code{\link{Zalpha_all}}
+}
diff --git a/tests/testthat/test-create_LDprofile.R b/tests/testthat/test-create_LDprofile.R
new file mode 100644
index 0000000000000000000000000000000000000000..3b74545e689d2a4994dc984466f74ea76b62c185
--- /dev/null
+++ b/tests/testthat/test-create_LDprofile.R
@@ -0,0 +1,209 @@
+df<-data.frame(
+  SNP=c("SNP1","SNP2","SNP3","SNP4","SNP5","SNP6","SNP7","SNP8","SNP9","SNP10","SNP11","SNP12","SNP13","SNP14","SNP15"),
+  POS=c(100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400,1500),
+  C1=c(1,1,2,1,2,1,1,2,1,2,1,2,1,1,1),
+  C2=c(2,2,1,2,1,2,1,2,2,2,1,2,1,1,2),
+  C3=c(2,1,2,2,2,1,1,2,2,1,2,2,1,1,2),
+  C4=c(1,1,2,1,2,2,1,1,1,1,1,2,2,2,2),
+  C5=c(1,1,2,1,2,1,2,1,1,1,1,1,2,1,1),
+  dist=c(0,0.00101,0.00123,0.00207,0.00218,0.00223,0.00235,0.00251,0.00272,0.00289,0.00304,0.00316,0.00335,0.00345,0.00374)
+)
+dist = df$dist
+x = as.matrix(df[,3:7])
+bin_size = 0.001
+max_dist = 0.005
+beta_params = TRUE
+
+## test that everything is calculated correctly given all parameters
+
+test_that("create_LDprofile calculates the LD profile correctly", {
+
+  expect_equal(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = 0.001, max_dist = 0.005, beta_params = TRUE),
+               data.frame(
+                 bin=c(0,0.001,0.002,0.003,0.004),
+                 rsq=c(0.285622427983539,0.280913978494624,0.263888888888889,0.319444444444444,NA),
+                 sd=c(0.270862044573862,0.201905775929377,0.321786617161322,0.142318760638328,NA),
+                 Beta_a=c(0.619957744381906,1.125028692019340,0.635410044952769,3.941019442363900,NA),
+                 Beta_b=c(1.062459890834270,2.446706389704430,1.149319432462400,8.454825333760550,NA),
+                 n=c(54,31,15,5,0)
+               ))
+})
+
+## Test the function with a different max_dist
+
+test_that("create_LDprofile calculates the LD profile correctly with a different max_dist", {
+
+  expect_equal(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = 0.001, max_dist = 0.003, beta_params = TRUE),
+               data.frame(
+                 bin=c(0,0.001,0.002),
+                 rsq=c(0.285622427983539,0.280913978494624,0.263888888888889),
+                 sd=c(0.270862044573862,0.201905775929377,0.321786617161322),
+                 Beta_a=c(0.619957744381906,1.125028692019340,0.635410044952769),
+                 Beta_b=c(1.062459890834270,2.446706389704430,1.149319432462400),
+                 n=c(54,31,15)
+               ))
+})
+## Test the function with no max_dist given
+
+test_that("create_LDprofile calculates the LD profile correctly with no max_dist supplied", {
+
+  expect_equal(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = 0.001, beta_params = TRUE),
+               data.frame(
+                 bin=c(0,0.001,0.002,0.003),
+                 rsq=c(0.285622427983539,0.280913978494624,0.263888888888889,0.319444444444444),
+                 sd=c(0.270862044573862,0.201905775929377,0.321786617161322,0.142318760638328),
+                 Beta_a=c(0.619957744381906,1.125028692019340,0.635410044952769,3.941019442363900),
+                 Beta_b=c(1.062459890834270,2.446706389704430,1.149319432462400,8.454825333760550),
+                 n=c(54,31,15,5)
+               ))
+})
+
+
+## Test the function with a different bin_size
+
+test_that("create_LDprofile calculates the LD profile correctly with a different bin size", {
+
+  expect_equal(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = 0.0005, beta_params = TRUE),
+               data.frame(
+                 bin=c(0,0.0005,0.001,0.0015,0.002,0.0025,0.003,0.0035),
+                 rsq=c(0.238505747126437,0.340277777777778,0.283459595959596,0.274691358024691,0.215277777777778,0.361111111111111,0.288194444444444,0.444444444444445),
+                 sd=c(0.211600341827602,0.322468326753589,0.220527561550113,0.158590157477369,0.293808275018141,0.387895557,0.14316339,NA),
+                 Beta_a=c(0.916070145958307,0.637072700079744,1.046576044485340,1.909812912830260,0.775059123115346,1.088198634018290,3.789877096116000,NA),
+                 Beta_b=c(2.326350552394540,0.872215477086822,2.166981335251990,5.166454170350760,1.748740564135290,1.488374161884570,9.367197007381050,NA),
+                 n=c(29,25,22,9,10,5,4,1)
+               ))
+})
+
+## Test the function with beta_params not specified
+
+test_that("create_LDprofile calculates the LD profile correctly with beta_params not specified", {
+
+  expect_equal(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = 0.001, max_dist = 0.005),
+               data.frame(
+                 bin=c(0,0.001,0.002,0.003,0.004),
+                 rsq=c(0.285622427983539,0.280913978494624,0.263888888888889,0.319444444444444,NA),
+                 sd=c(0.270862044573862,0.201905775929377,0.321786617161322,0.142318760638328,NA),
+                 Beta_a=c(NA,NA,NA,NA,NA),
+                 Beta_b=c(NA,NA,NA,NA,NA),
+                 n=c(54,31,15,5,0)
+               ))
+})
+
+## Test the function with a character matrix as x
+
+df1<-df
+df1[df1==1]<-"A"
+df1[df1==2]<-"B"
+test_that("create_LDprofile calculates the LD profile correctly with character matrix", {
+
+  expect_equal(create_LDprofile(dist = df1$dist, x = as.matrix(df1[,3:7]), bin_size = 0.001, max_dist = 0.005, beta_params = TRUE),
+               data.frame(
+                 bin=c(0,0.001,0.002,0.003,0.004),
+                 rsq=c(0.285622427983539,0.280913978494624,0.263888888888889,0.319444444444444,NA),
+                 sd=c(0.270862044573862,0.201905775929377,0.321786617161322,0.142318760638328,NA),
+                 Beta_a=c(0.619957744381906,1.125028692019340,0.635410044952769,3.941019442363900,NA),
+                 Beta_b=c(1.062459890834270,2.446706389704430,1.149319432462400,8.454825333760550,NA),
+                 n=c(54,31,15,5,0)
+               ))
+})
+
+## Test all the checks
+
+## Test the function with dists non-numeric
+
+test_that("create_LDprofile fails when dist is non-numeric", {
+
+  expect_error(create_LDprofile(dist = paste0(df$dist,"dist"), x = as.matrix(df[,3:7]), bin_size = 0.001, max_dist = 0.005, beta_params = TRUE),
+               "dist must be a numeric vector")
+})
+
+## Test the function with dists not a matrix
+
+test_that("create_LDprofile fails when dist is not a matrix", {
+
+  expect_error(create_LDprofile(dist = df, x = as.matrix(df[,3:7]), bin_size = 0.001, max_dist = 0.005, beta_params = TRUE),
+               "dist must be a numeric vector")
+})
+
+
+## Test the function with x not a matrix
+
+test_that("create_LDprofile fails when x is not a matrix", {
+
+  expect_error(create_LDprofile(dist = df$dist, x = df[,3:7], bin_size = 0.001, max_dist = 0.005, beta_params = TRUE),
+               "x must be a matrix")
+})
+
+## Test the function with x not having the correct amount of rows
+
+test_that("create_LDprofile fails when the number of rows in x is not equal to the length of pos", {
+
+  expect_error(create_LDprofile(dist = df$dist, x = t(as.matrix(df[,3:7])), bin_size = 0.001, max_dist = 0.005, beta_params = TRUE),
+               "The number of rows in x must equal the number of SNP genetic distances given in dist")
+})
+
+## Test the function with a SNP having only one allele
+
+test_that("create_LDprofile fails when a SNP has only one allele", {
+
+  df1<-df
+  df1[1,3:7]<-1
+  expect_error(create_LDprofile(dist = df1$dist, x = as.matrix(df1[,3:7]), bin_size = 0.001, max_dist = 0.005, beta_params = TRUE),
+               "SNPs must all be biallelic")
+})
+
+## Test the function with a SNP having more than two alleles
+
+test_that("create_LDprofile fails when a SNP has more than two alleles", {
+
+  df1<-df
+  df1[1,7]<-3
+  expect_error(create_LDprofile(dist = df1$dist, x = as.matrix(df1[,3:7]), bin_size = 0.001, max_dist = 0.005, beta_params = TRUE),
+               "SNPs must all be biallelic")
+})
+
+## Test the function with bin_size as non-numeric
+
+test_that("create_LDprofile fails when bin_size is non-numeric", {
+
+  expect_error(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = "0.001cM", max_dist = 0.005, beta_params = TRUE),
+               "bin_size must be a number greater than 0")
+})
+
+## Test the function with bin_size as negative
+
+test_that("create_LDprofile fails when bin_size is negative", {
+
+  expect_error(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = -1, max_dist = 0.005, beta_params = TRUE),
+               "bin_size must be a number greater than 0")
+})
+
+## Test the function with max_dist as non-numeric
+
+test_that("create_LDprofile fails when max_dist is non-numeric", {
+
+  expect_error(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = 0.001, max_dist = "0.005cM", beta_params = TRUE),
+               "max_dist must be a number greater than 0")
+})
+
+## Test the function with max_dist as negative
+
+test_that("create_LDprofile fails when max_dist is negative", {
+
+  expect_error(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = 0.001, max_dist = -1, beta_params = TRUE),
+               "max_dist must be a number greater than 0")
+})
+
+## Test the function with beta_params not logical
+
+test_that("create_LDprofile fails when beta_params is not logical", {
+
+  expect_error(create_LDprofile(dist = df$dist, x = as.matrix(df[,3:7]), bin_size = 0.001, max_dist = 0.005, beta_params = 1),
+               "beta_params must be TRUE or FALSE")
+})
+
+## Test the function with missing values
+
+## Test the function with fitdistrplus package not loaded and beta_params = TRUE
+
+## Test the function when beta estimation doesn't work the first try