From ddefe8ab864567ed2726f27b63877c28350ecbf6 Mon Sep 17 00:00:00 2001 From: Clare <chorscroft@users.noreply.github.com> Date: Tue, 15 Sep 2020 09:02:13 +0100 Subject: [PATCH] Added code to force pairs of SNPs with a genetic distance greater than the biggest bin in the LD profile into the maximum bin --- R/Zalpha_BetaCDF.R | 2 ++ R/Zalpha_Zscore.R | 2 ++ R/Zalpha_all.R | 3 +++ R/Zalpha_expected.R | 2 ++ R/Zalpha_log_rsq_over_expected.R | 2 ++ R/Zalpha_rsq_over_expected.R | 2 ++ R/Zbeta_BetaCDF.R | 1 + R/Zbeta_Zscore.R | 1 + R/Zbeta_expected.R | 1 + R/Zbeta_log_rsq_over_expected.R | 1 + R/Zbeta_rsq_over_expected.R | 1 + 11 files changed, 18 insertions(+) diff --git a/R/Zalpha_BetaCDF.R b/R/Zalpha_BetaCDF.R index 640b13a..2368cef 100644 --- a/R/Zalpha_BetaCDF.R +++ b/R/Zalpha_BetaCDF.R @@ -161,11 +161,13 @@ Zalpha_BetaCDF<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_Beta_a, LDp ##Left # Find distances between each SNP in L and round to bin size bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2) LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_Beta_a,LDprofile_Beta_b),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) LrsqSum<-sum(pbeta(LrsqExp$Lrsq,LrsqExp$LDprofile_Beta_a,LrsqExp$LDprofile_Beta_b)) ##Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2) RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_Beta_a,LDprofile_Beta_b),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) RrsqSum<-sum(pbeta(RrsqExp$Rrsq,RrsqExp$LDprofile_Beta_a,RrsqExp$LDprofile_Beta_b)) diff --git a/R/Zalpha_Zscore.R b/R/Zalpha_Zscore.R index d7f25e9..4cc3183 100644 --- a/R/Zalpha_Zscore.R +++ b/R/Zalpha_Zscore.R @@ -164,11 +164,13 @@ Zalpha_Zscore<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_rsq, LDprofi ##Left # Find distances between each SNP in L and round to bin size bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2) LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq,LDprofile_sd),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) LrsqSum<-sum((LrsqExp$Lrsq-LrsqExp$LDprofile_rsq)/LrsqExp$LDprofile_sd) ##Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2) RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq,LDprofile_sd),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) RrsqSum<-sum((RrsqExp$Rrsq-RrsqExp$LDprofile_rsq)/RrsqExp$LDprofile_sd) diff --git a/R/Zalpha_all.R b/R/Zalpha_all.R index 35e9991..38f9a86 100644 --- a/R/Zalpha_all.R +++ b/R/Zalpha_all.R @@ -235,12 +235,15 @@ Zalpha_all <- function(pos, ws, x=NULL, dist=NULL, LDprofile_bins=NULL, LDprofil if (is.null(dist)==FALSE & is.null(LDprofile_bins)==FALSE & is.null(LDprofile_rsq)==FALSE){ #Left bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) #Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) #Over bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) outputList$Zalpha_expected[i]<-(sum(LrsqExp$LDprofile_rsq)/choose(noL,2)+sum(RrsqExp$LDprofile_rsq)/choose(noR,2))/2 diff --git a/R/Zalpha_expected.R b/R/Zalpha_expected.R index e863366..ae5ce5b 100644 --- a/R/Zalpha_expected.R +++ b/R/Zalpha_expected.R @@ -132,9 +132,11 @@ Zalpha_expected<-function(pos, ws, dist, LDprofile_bins, LDprofile_rsq, minRandL ##Left # Find distances between each SNP in L and round to bin size bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) LrsqSum<-sum(merge(data.frame(bins=as.character(bins)),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE)[,2]) ##Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) RrsqSum<-sum(merge(data.frame(bins=as.character(bins)),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE)[,2]) outputList$Zalpha_expected[i]<-(LrsqSum/choose(noL,2)+RrsqSum/choose(noR,2))/2 diff --git a/R/Zalpha_log_rsq_over_expected.R b/R/Zalpha_log_rsq_over_expected.R index bde2f5c..bbff027 100644 --- a/R/Zalpha_log_rsq_over_expected.R +++ b/R/Zalpha_log_rsq_over_expected.R @@ -155,12 +155,14 @@ Zalpha_log_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofi ##Left # Find distances between each SNP in L and round to bin size bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2) Lrsq[Lrsq==0]<-min(Lrsq[Lrsq>0]) #removes zeros by replacing with lowest correlation greater than zero LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) LrsqSum<-sum(log10(LrsqExp$Lrsq/LrsqExp$LDprofile_rsq)) ##Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2) Rrsq[Rrsq==0]<-min(Rrsq[Rrsq>0]) #removes zeros by replacing with lowest correlation greater than zero RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) diff --git a/R/Zalpha_rsq_over_expected.R b/R/Zalpha_rsq_over_expected.R index 56672a3..3eb9c54 100644 --- a/R/Zalpha_rsq_over_expected.R +++ b/R/Zalpha_rsq_over_expected.R @@ -155,11 +155,13 @@ Zalpha_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_r ##Left # Find distances between each SNP in L and round to bin size bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2) LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) LrsqSum<-sum(LrsqExp$Lrsq/LrsqExp$LDprofile_rsq) ##Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2) RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) RrsqSum<-sum(RrsqExp$Rrsq/RrsqExp$LDprofile_rsq) diff --git a/R/Zbeta_BetaCDF.R b/R/Zbeta_BetaCDF.R index c378482..7f58f07 100644 --- a/R/Zbeta_BetaCDF.R +++ b/R/Zbeta_BetaCDF.R @@ -161,6 +161,7 @@ Zbeta_BetaCDF<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_Beta_a, LDpr # Find distances between each SNP in L and round to bin size bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)])) rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_Beta_a,LDprofile_Beta_b),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) rsqSum<-sum(pbeta(rsqExp$rsq,rsqExp$LDprofile_Beta_a,rsqExp$LDprofile_Beta_b)) diff --git a/R/Zbeta_Zscore.R b/R/Zbeta_Zscore.R index e2795dd..41338cf 100644 --- a/R/Zbeta_Zscore.R +++ b/R/Zbeta_Zscore.R @@ -163,6 +163,7 @@ Zbeta_Zscore<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_rsq, LDprofil } else { # Find distances between each SNP in L and round to bin size bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)])) rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq,LDprofile_sd),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) rsqSum<-sum((rsqExp$rsq-rsqExp$LDprofile_rsq)/rsqExp$LDprofile_sd) diff --git a/R/Zbeta_expected.R b/R/Zbeta_expected.R index 76776da..064b167 100644 --- a/R/Zbeta_expected.R +++ b/R/Zbeta_expected.R @@ -132,6 +132,7 @@ Zbeta_expected<-function(pos, ws, dist, LDprofile_bins, LDprofile_rsq, minRandL # Find the distances between each SNP in the over region and round to bin size bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) rsqSum<-sum(merge(data.frame(bins=as.character(bins)),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE)[,2]) outputList$Zbeta_expected[i]<-rsqSum/(noL*noR) diff --git a/R/Zbeta_log_rsq_over_expected.R b/R/Zbeta_log_rsq_over_expected.R index d5640bb..928e9d2 100644 --- a/R/Zbeta_log_rsq_over_expected.R +++ b/R/Zbeta_log_rsq_over_expected.R @@ -155,6 +155,7 @@ Zbeta_log_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofil # Find distances between each SNP in L and round to bin size bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)])) rsq[rsq==0]<-min(rsq[rsq>0]) #removes zeros by replacing with lowest correlation greater than zero rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) diff --git a/R/Zbeta_rsq_over_expected.R b/R/Zbeta_rsq_over_expected.R index e0cc7fe..0c01611 100644 --- a/R/Zbeta_rsq_over_expected.R +++ b/R/Zbeta_rsq_over_expected.R @@ -155,6 +155,7 @@ Zbeta_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_rs # Find distances between each SNP in L and round to bin size bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) + bins[bins>max(LDprofile_bins)]<-max(LDprofile_bins) rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)])) rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) rsqSum<-sum(rsqExp$rsq/rsqExp$LDprofile_rsq) -- GitLab