From ddefe8ab864567ed2726f27b63877c28350ecbf6 Mon Sep 17 00:00:00 2001
From: Clare <chorscroft@users.noreply.github.com>
Date: Tue, 15 Sep 2020 09:02:13 +0100
Subject: [PATCH] Added code to force pairs of SNPs with a genetic distance
 greater than the biggest bin in the LD profile into the maximum bin

---
 R/Zalpha_BetaCDF.R               | 2 ++
 R/Zalpha_Zscore.R                | 2 ++
 R/Zalpha_all.R                   | 3 +++
 R/Zalpha_expected.R              | 2 ++
 R/Zalpha_log_rsq_over_expected.R | 2 ++
 R/Zalpha_rsq_over_expected.R     | 2 ++
 R/Zbeta_BetaCDF.R                | 1 +
 R/Zbeta_Zscore.R                 | 1 +
 R/Zbeta_expected.R               | 1 +
 R/Zbeta_log_rsq_over_expected.R  | 1 +
 R/Zbeta_rsq_over_expected.R      | 1 +
 11 files changed, 18 insertions(+)

diff --git a/R/Zalpha_BetaCDF.R b/R/Zalpha_BetaCDF.R
index 640b13a..2368cef 100644
--- a/R/Zalpha_BetaCDF.R
+++ b/R/Zalpha_BetaCDF.R
@@ -161,11 +161,13 @@ Zalpha_BetaCDF<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_Beta_a, LDp
       ##Left
       # Find distances between each SNP in L and round to bin size
       bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2)
       LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_Beta_a,LDprofile_Beta_b),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       LrsqSum<-sum(pbeta(LrsqExp$Lrsq,LrsqExp$LDprofile_Beta_a,LrsqExp$LDprofile_Beta_b))
       ##Right
       bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2)
       RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_Beta_a,LDprofile_Beta_b),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       RrsqSum<-sum(pbeta(RrsqExp$Rrsq,RrsqExp$LDprofile_Beta_a,RrsqExp$LDprofile_Beta_b))
diff --git a/R/Zalpha_Zscore.R b/R/Zalpha_Zscore.R
index d7f25e9..4cc3183 100644
--- a/R/Zalpha_Zscore.R
+++ b/R/Zalpha_Zscore.R
@@ -164,11 +164,13 @@ Zalpha_Zscore<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_rsq, LDprofi
       ##Left
       # Find distances between each SNP in L and round to bin size
       bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2)
       LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq,LDprofile_sd),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       LrsqSum<-sum((LrsqExp$Lrsq-LrsqExp$LDprofile_rsq)/LrsqExp$LDprofile_sd)
       ##Right
       bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2)
       RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq,LDprofile_sd),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       RrsqSum<-sum((RrsqExp$Rrsq-RrsqExp$LDprofile_rsq)/RrsqExp$LDprofile_sd)
diff --git a/R/Zalpha_all.R b/R/Zalpha_all.R
index 35e9991..38f9a86 100644
--- a/R/Zalpha_all.R
+++ b/R/Zalpha_all.R
@@ -235,12 +235,15 @@ Zalpha_all <- function(pos, ws, x=NULL, dist=NULL, LDprofile_bins=NULL, LDprofil
       if (is.null(dist)==FALSE & is.null(LDprofile_bins)==FALSE & is.null(LDprofile_rsq)==FALSE){
         #Left
         bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size)
+        bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
         LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
         #Right
         bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size)
+        bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
         RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
         #Over
         bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size)
+        bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
         rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
 
         outputList$Zalpha_expected[i]<-(sum(LrsqExp$LDprofile_rsq)/choose(noL,2)+sum(RrsqExp$LDprofile_rsq)/choose(noR,2))/2
diff --git a/R/Zalpha_expected.R b/R/Zalpha_expected.R
index e863366..ae5ce5b 100644
--- a/R/Zalpha_expected.R
+++ b/R/Zalpha_expected.R
@@ -132,9 +132,11 @@ Zalpha_expected<-function(pos, ws, dist, LDprofile_bins, LDprofile_rsq, minRandL
       ##Left
       # Find distances between each SNP in L and round to bin size
       bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       LrsqSum<-sum(merge(data.frame(bins=as.character(bins)),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE)[,2])
       ##Right
       bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       RrsqSum<-sum(merge(data.frame(bins=as.character(bins)),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE)[,2])
 
       outputList$Zalpha_expected[i]<-(LrsqSum/choose(noL,2)+RrsqSum/choose(noR,2))/2
diff --git a/R/Zalpha_log_rsq_over_expected.R b/R/Zalpha_log_rsq_over_expected.R
index bde2f5c..bbff027 100644
--- a/R/Zalpha_log_rsq_over_expected.R
+++ b/R/Zalpha_log_rsq_over_expected.R
@@ -155,12 +155,14 @@ Zalpha_log_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofi
       ##Left
       # Find distances between each SNP in L and round to bin size
       bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2)
       Lrsq[Lrsq==0]<-min(Lrsq[Lrsq>0])  #removes zeros by replacing with lowest correlation greater than zero
       LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       LrsqSum<-sum(log10(LrsqExp$Lrsq/LrsqExp$LDprofile_rsq))
       ##Right
       bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2)
       Rrsq[Rrsq==0]<-min(Rrsq[Rrsq>0])  #removes zeros by replacing with lowest correlation greater than zero
       RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
diff --git a/R/Zalpha_rsq_over_expected.R b/R/Zalpha_rsq_over_expected.R
index 56672a3..3eb9c54 100644
--- a/R/Zalpha_rsq_over_expected.R
+++ b/R/Zalpha_rsq_over_expected.R
@@ -155,11 +155,13 @@ Zalpha_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_r
       ##Left
       # Find distances between each SNP in L and round to bin size
       bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2)
       LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       LrsqSum<-sum(LrsqExp$Lrsq/LrsqExp$LDprofile_rsq)
       ##Right
       bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2)
       RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       RrsqSum<-sum(RrsqExp$Rrsq/RrsqExp$LDprofile_rsq)
diff --git a/R/Zbeta_BetaCDF.R b/R/Zbeta_BetaCDF.R
index c378482..7f58f07 100644
--- a/R/Zbeta_BetaCDF.R
+++ b/R/Zbeta_BetaCDF.R
@@ -161,6 +161,7 @@ Zbeta_BetaCDF<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_Beta_a, LDpr
 
       # Find distances between each SNP in L and round to bin size
       bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)]))
       rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_Beta_a,LDprofile_Beta_b),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       rsqSum<-sum(pbeta(rsqExp$rsq,rsqExp$LDprofile_Beta_a,rsqExp$LDprofile_Beta_b))
diff --git a/R/Zbeta_Zscore.R b/R/Zbeta_Zscore.R
index e2795dd..41338cf 100644
--- a/R/Zbeta_Zscore.R
+++ b/R/Zbeta_Zscore.R
@@ -163,6 +163,7 @@ Zbeta_Zscore<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_rsq, LDprofil
     } else {
       # Find distances between each SNP in L and round to bin size
       bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)]))
       rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq,LDprofile_sd),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       rsqSum<-sum((rsqExp$rsq-rsqExp$LDprofile_rsq)/rsqExp$LDprofile_sd)
diff --git a/R/Zbeta_expected.R b/R/Zbeta_expected.R
index 76776da..064b167 100644
--- a/R/Zbeta_expected.R
+++ b/R/Zbeta_expected.R
@@ -132,6 +132,7 @@ Zbeta_expected<-function(pos, ws, dist, LDprofile_bins, LDprofile_rsq, minRandL
 
       # Find the distances between each SNP in the over region and round to bin size
       bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       rsqSum<-sum(merge(data.frame(bins=as.character(bins)),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE)[,2])
       outputList$Zbeta_expected[i]<-rsqSum/(noL*noR)
 
diff --git a/R/Zbeta_log_rsq_over_expected.R b/R/Zbeta_log_rsq_over_expected.R
index d5640bb..928e9d2 100644
--- a/R/Zbeta_log_rsq_over_expected.R
+++ b/R/Zbeta_log_rsq_over_expected.R
@@ -155,6 +155,7 @@ Zbeta_log_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofil
 
       # Find distances between each SNP in L and round to bin size
       bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)]))
       rsq[rsq==0]<-min(rsq[rsq>0])  #removes zeros by replacing with lowest correlation greater than zero
       rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
diff --git a/R/Zbeta_rsq_over_expected.R b/R/Zbeta_rsq_over_expected.R
index e0cc7fe..0c01611 100644
--- a/R/Zbeta_rsq_over_expected.R
+++ b/R/Zbeta_rsq_over_expected.R
@@ -155,6 +155,7 @@ Zbeta_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_rs
 
       # Find distances between each SNP in L and round to bin size
       bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size)
+      bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins)
       rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)]))
       rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE)
       rsqSum<-sum(rsqExp$rsq/rsqExp$LDprofile_rsq)
-- 
GitLab