From 69c7880b452fe5d6ed6657b3157cead09de5c549 Mon Sep 17 00:00:00 2001 From: Clare <chorscroft@users.noreply.github.com> Date: Wed, 11 Mar 2020 11:19:37 +0000 Subject: [PATCH] Added use="pairwise.complete.obs" to cor functions to allow for missing data --- R/Zalpha_BetaCDF.R | 4 ++-- R/Zalpha_Zscore.R | 4 ++-- R/Zalpha_all.R | 6 +++--- R/Zalpha_log_rsq_over_expected.R | 4 ++-- R/Zalpha_rsq_over_expected.R | 4 ++-- R/Zbeta.R | 2 +- R/Zbeta_BetaCDF.R | 2 +- R/Zbeta_Zscore.R | 2 +- R/Zbeta_log_rsq_over_expected.R | 2 +- R/Zbeta_rsq_over_expected.R | 2 +- R/zalpha.R | 4 ++-- 11 files changed, 18 insertions(+), 18 deletions(-) diff --git a/R/Zalpha_BetaCDF.R b/R/Zalpha_BetaCDF.R index 6dd06dd..69cf1b7 100644 --- a/R/Zalpha_BetaCDF.R +++ b/R/Zalpha_BetaCDF.R @@ -159,12 +159,12 @@ Zalpha_BetaCDF<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_Beta_a, LDp ##Left # Find distances between each SNP in L and round to bin size bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) - Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]))^2) + Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2) LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_Beta_a,LDprofile_Beta_b),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) LrsqSum<-sum(pbeta(LrsqExp$Lrsq,LrsqExp$LDprofile_Beta_a,LrsqExp$LDprofile_Beta_b)) ##Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) - Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]))^2) + Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2) RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_Beta_a,LDprofile_Beta_b),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) RrsqSum<-sum(pbeta(RrsqExp$Rrsq,RrsqExp$LDprofile_Beta_a,RrsqExp$LDprofile_Beta_b)) diff --git a/R/Zalpha_Zscore.R b/R/Zalpha_Zscore.R index e1d2749..f044925 100644 --- a/R/Zalpha_Zscore.R +++ b/R/Zalpha_Zscore.R @@ -162,12 +162,12 @@ Zalpha_Zscore<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_rsq, LDprofi ##Left # Find distances between each SNP in L and round to bin size bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) - Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]))^2) + Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2) LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq,LDprofile_sd),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) LrsqSum<-sum((LrsqExp$Lrsq-LrsqExp$LDprofile_rsq)/LrsqExp$LDprofile_sd) ##Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) - Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]))^2) + Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2) RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq,LDprofile_sd),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) RrsqSum<-sum((RrsqExp$Rrsq-RrsqExp$LDprofile_rsq)/RrsqExp$LDprofile_sd) diff --git a/R/Zalpha_all.R b/R/Zalpha_all.R index a3a2310..30d81a8 100644 --- a/R/Zalpha_all.R +++ b/R/Zalpha_all.R @@ -215,11 +215,11 @@ Zalpha_all <- function(pos, ws, x=NULL, dist=NULL, LDprofile_bins=NULL, LDprofil outputList$L_plus_R[i]<-choose(noL,2)+choose(noR,2) if (is.null(x)==FALSE){ ##Left - Lrsq <- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]))^2) + Lrsq <- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2) ##Right - Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]))^2) + Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2) ##Over - rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]))^2)[1:noL,(noL+2):(noL+noR+1)])) + rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)])) outputList$Zalpha[i]<-(sum(Lrsq)/choose(noL,2)+sum(Rrsq)/choose(noR,2))/2 outputList$Zbeta[i]<-sum(rsq)/(noL*noR) } else { diff --git a/R/Zalpha_log_rsq_over_expected.R b/R/Zalpha_log_rsq_over_expected.R index 433cfae..09e2a6b 100644 --- a/R/Zalpha_log_rsq_over_expected.R +++ b/R/Zalpha_log_rsq_over_expected.R @@ -153,13 +153,13 @@ Zalpha_log_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofi ##Left # Find distances between each SNP in L and round to bin size bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) - Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]))^2) + Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2) Lrsq[Lrsq==0]<-min(Lrsq[Lrsq>0]) #removes zeros by replacing with lowest correlation greater than zero LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) LrsqSum<-sum(log10(LrsqExp$Lrsq/LrsqExp$LDprofile_rsq)) ##Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) - Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]))^2) + Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2) Rrsq[Rrsq==0]<-min(Rrsq[Rrsq>0]) #removes zeros by replacing with lowest correlation greater than zero RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) RrsqSum<-sum(log10(RrsqExp$Rrsq/RrsqExp$LDprofile_rsq)) diff --git a/R/Zalpha_rsq_over_expected.R b/R/Zalpha_rsq_over_expected.R index e2fb25c..bffcbc7 100644 --- a/R/Zalpha_rsq_over_expected.R +++ b/R/Zalpha_rsq_over_expected.R @@ -153,12 +153,12 @@ Zalpha_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_r ##Left # Find distances between each SNP in L and round to bin size bins<-sapply(lower_triangle(outer(dist[pos>=currentPos-ws/2 & pos < currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-")),assign_bins,bin_size=bin_size) - Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]))^2) + Lrsq<- lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2) LrsqExp<-merge(data.frame(bins=as.character(bins),Lrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) LrsqSum<-sum(LrsqExp$Lrsq/LrsqExp$LDprofile_rsq) ##Right bins<-sapply(lower_triangle(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos<=currentPos+ws/2 & pos > currentPos],"-")),assign_bins,bin_size=bin_size) - Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]))^2) + Rrsq<-lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2) RrsqExp<-merge(data.frame(bins=as.character(bins),Rrsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) RrsqSum<-sum(RrsqExp$Rrsq/RrsqExp$LDprofile_rsq) diff --git a/R/Zbeta.R b/R/Zbeta.R index fbd4997..3ed2dfe 100644 --- a/R/Zbeta.R +++ b/R/Zbeta.R @@ -101,7 +101,7 @@ Zbeta <- function(pos, ws, x, minRandL = 4, minRL = 25, X = NULL) { #NA outputList$Zbeta[i]<-NA } else { - rsqSum<-sum((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]))^2)[1:noL,(noL+2):(noL+noR+1)]) + rsqSum<-sum((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)]) outputList$Zbeta[i]<-rsqSum/(noL*noR) } } diff --git a/R/Zbeta_BetaCDF.R b/R/Zbeta_BetaCDF.R index 4226618..a67ae22 100644 --- a/R/Zbeta_BetaCDF.R +++ b/R/Zbeta_BetaCDF.R @@ -159,7 +159,7 @@ Zbeta_BetaCDF<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_Beta_a, LDpr # Find distances between each SNP in L and round to bin size bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) - rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]))^2)[1:noL,(noL+2):(noL+noR+1)])) + rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)])) rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_Beta_a,LDprofile_Beta_b),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) rsqSum<-sum(pbeta(rsqExp$rsq,rsqExp$LDprofile_Beta_a,rsqExp$LDprofile_Beta_b)) diff --git a/R/Zbeta_Zscore.R b/R/Zbeta_Zscore.R index c28e723..29ba79e 100644 --- a/R/Zbeta_Zscore.R +++ b/R/Zbeta_Zscore.R @@ -161,7 +161,7 @@ Zbeta_Zscore<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_rsq, LDprofil } else { # Find distances between each SNP in L and round to bin size bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) - rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]))^2)[1:noL,(noL+2):(noL+noR+1)])) + rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)])) rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq,LDprofile_sd),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) rsqSum<-sum((rsqExp$rsq-rsqExp$LDprofile_rsq)/rsqExp$LDprofile_sd) diff --git a/R/Zbeta_log_rsq_over_expected.R b/R/Zbeta_log_rsq_over_expected.R index cf00bc1..e257c67 100644 --- a/R/Zbeta_log_rsq_over_expected.R +++ b/R/Zbeta_log_rsq_over_expected.R @@ -153,7 +153,7 @@ Zbeta_log_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofil # Find distances between each SNP in L and round to bin size bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) - rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]))^2)[1:noL,(noL+2):(noL+noR+1)])) + rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)])) rsq[rsq==0]<-min(rsq[rsq>0]) #removes zeros by replacing with lowest correlation greater than zero rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) rsqSum<-sum(log10(rsqExp$rsq/rsqExp$LDprofile_rsq)) diff --git a/R/Zbeta_rsq_over_expected.R b/R/Zbeta_rsq_over_expected.R index a6ecc13..30bb6a0 100644 --- a/R/Zbeta_rsq_over_expected.R +++ b/R/Zbeta_rsq_over_expected.R @@ -153,7 +153,7 @@ Zbeta_rsq_over_expected<-function(pos, ws, x, dist, LDprofile_bins, LDprofile_rs # Find distances between each SNP in L and round to bin size bins<-sapply(outer(dist[pos<=currentPos+ws/2 & pos > currentPos],dist[pos>=currentPos-ws/2 & pos < currentPos],"-"),assign_bins,bin_size=bin_size) - rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]))^2)[1:noL,(noL+2):(noL+noR+1)])) + rsq<-as.vector(t((cor(t(x[pos>=currentPos-ws/2 & pos<=currentPos+ws/2,]),use="pairwise.complete.obs")^2)[1:noL,(noL+2):(noL+noR+1)])) rsqExp<-merge(data.frame(bins=as.character(bins),rsq),data.frame(LDprofile_bins=as.character(LDprofile_bins),LDprofile_rsq),by.x="bins",by.y="LDprofile_bins",all.x=TRUE,sort=FALSE) rsqSum<-sum(rsqExp$rsq/rsqExp$LDprofile_rsq) outputList$Zbeta_rsq_over_expected[i]<-rsqSum/(noL*noR) diff --git a/R/zalpha.R b/R/zalpha.R index 6f22769..c7cc51d 100644 --- a/R/zalpha.R +++ b/R/zalpha.R @@ -103,9 +103,9 @@ Zalpha <- function(pos, ws, x, minRandL = 4, minRL = 25, X = NULL) { outputList$Zalpha[i]<-NA } else { ##Left - LrsqSum<- sum(lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]))^2)) + LrsqSum<-sum(lower_triangle(cor(t(x[pos>=currentPos-ws/2 & pos < currentPos,]),use="pairwise.complete.obs")^2)) ##Right - RrsqSum<-sum(lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]))^2)) + RrsqSum<-sum(lower_triangle(cor(t(x[pos<=currentPos+ws/2 & pos > currentPos,]),use="pairwise.complete.obs")^2)) outputList$Zalpha[i]<-(LrsqSum/choose(noL,2)+RrsqSum/choose(noR,2))/2 } } -- GitLab