#get the updated version of the motmot package from github that includes the psi functions #library(devtools) #install_github(repo="motmot", username="ghthomas", ref="motmot_psi") library(motmot) #phytools used for labeling branches library(phytools) #laser used for fitting birth-death model to get lambda library(laser) #modified laser:::bd function to optimize lambda and mu for a given mu/lambda bdFixedA<-function (x, a = 0, maxr = 10){ N <- length(x) + 1 b <- sort(x) z <- rev(c(b[1], diff(b))) x <- c(0, x) res <- list() mlbd <- function(r) { -(sum(log(1:(N - 1))) + ((N - 2) * log(r)) + (r * sum(x[3:N])) + (N * log(1 - a)) - 2 * sum(log(exp(r * x[2:N]) - a))) } temp<-optimize(mlbd, interval = c(0,maxr)) res$LH <- -temp$objective res$r1 <- temp$minimum res$a <- a res$aic <- (-2 * res$LH) + 4 return(res) } #basic functions for AIC/Akaike weights getAIC<-function(L, np, n, AICc = TRUE){ result <- (-2) * (L) + 2 * np if(AICc) result <- result + 2 * np * ((np + 1) / (n - np - 1)) result } getAICweights<-function(AIC){ dAIC <- AIC - min(AIC) exp(-dAIC / 2) / sum(exp(-dAIC / 2)) } #read in the tree and data files for Sebastes rockfishes data(sebastes.tree) tree<-sebastes.tree data(sebastes.data) dat<-sebastes.data name.check(tree,dat) #prune tree to include only northeast Pacific species, but retain information about which branches in the pruned tree contain 'missing' nodes dat<-dat[dat$inNEP==TRUE,] treed<-dropTipSobs(tree,which(tree$tip.label%in%rownames(dat)==FALSE)) #treed is a phylo object with extra element Sobs (number of 'observed' speciation events per branch) #maked two data matrices matched to the tree tip order: #one with (square-root transformed average adult) depth data only x<-matrix(dat[treed$tip.label,"depth"],dimnames=list(treed$tip.label,"y")) #and one with both depth and gill raker number for trait comparison x2<-as.matrix(dat[treed$tip.label,c("depth","GRN")]) #can optionally add measurement error (either known, or a fraction of the among-species variance), but this is not handled for multivariate data se<-x;se[]<-0*sd(x[,1]) #paint branches of a subclade of interest for clade comparison - just as an illustration, we're separating a 31-species subclade restricted to the northeast Pacific from the "background" branches that also include northwest Pacific and trans-Pacific species treedA<-paintBranches(treed, edge=getDescendants(treed, node=getMRCA(treed, c("Sebastes_jordani","Sebastes_pinniger"))), state="2", anc.state="1") branchLabels<-unlist(sapply(treedA$maps,names)) plot(treed, no.margin=T, show.tip.label=T, edge.width=2, edge.color=c("blue", "red")[as.numeric(branchLabels)]) #estimate lambda and mu using bd in the laser package ra<-bd(as.numeric(branching.times(treed))) LA<-as.numeric(ra$r1/(1-ra$a));MU<-as.numeric(ra$r1/(1/ra$a-1)) MU #mu is estimated to be zero, so we won't bothering sampling 'hidden' speciation events for now (can reanalyze after enforcing nonzero extinction) bm<-transformPhylo.ML(x,treed,model="bm",meserr=se) psi<-transformPhylo.ML(x,treed,model="psi",meserr=se,la=LA) twopsi<-transformPhylo.ML(x,treed,model="multipsi",meserr=se, la=LA,branchLabels=branchLabels) #to look at the likelihood surface for psi logl<-numeric() xx<-seq(0,1,by=0.025) for(i in 1:length(xx)) logl[i]<-transformPhylo.ll(x,treed,model="psi",meserr=se,psi=xx[i],la=LA)$logL[1,1] plot(xx,logl,type="l",xlab=expression(paste(psi)),ylab="logL", lwd=2,cex.lab=1.3) points(psi$psi[1],psi$Max) abline(h=psi$Max-1.92) #separate likelihood surfaces for each clade xx<-seq(0,1,by=0.025) logl<-matrix(NA,length(xx),length(twopsi$psi[,1])) for(k in 1:length(twopsi$psi[,1])){ for(i in 1:length(xx)){ psivals<-twopsi$psi[,1];psivals[k]<-xx[i] logl[i,k]<-transformPhylo.ll(x,treed,model="multipsi",meserr=se,psi=psivals,la=LA, branchLabels=branchLabels)$logL[1,1] } } plot(NA, xlim=c(0,1), ylim=range(logl), ylab="relative logL", xlab=expression(paste(psi))) for(i in 1:length(twopsi$psi[,1]))lines(xx, logl[,i], col=c("blue","red")[i]) points(twopsi$psi[,1], rep(twopsi$Max,2), col=c("blue","red")) abline(h=twopsi$Max-1.92) #likelihood ratio tests for psi vs BM, and for twopsi vs psi round(c(LR=2*(psi$Max-bm$logL), p=pchisq(2*(psi$Max-bm$logL),1,lower.tail=F)),3) round(c(LR=2*(twopsi$Max-psi$Max), p=pchisq(2*(twopsi$Max-psi$Max),1,lower.tail=F)),3) #AIC, delta-AIC and Akaike weights for each model aic<-getAIC(c(bm$logL, psi$Max, twopsi$Max), np=c(2,3,4), AICc=T, n=66) aic-min(aic) getAICweights(aic) #calculate the rate parameter at the ML values (this could be part of the output from transformPhylo.ML). s2t<-likTraitPhylo(x, transformPhylo(phy = treed, model = "multipsi", psi = twopsi$psi[,1], meserr = se, y = x, la = LA, branchLabels = branchLabels))[[1]][1] #s2t (total rate of evolution), psi and lambda can be reparameterized to the Brownian rate (s2a) and the speciational step size distribution (s2c) s2c<-s2t*(psi$psi[1]/(2*LA)) s2a<-s2t*(1-psi$psi[1]) #visualize how the tree looks under the multipsi model: it's subtle in this case as the clades have similar psis, but the model can result in one clade with branches proportional to time and another clade with equal branches plot(transformPhylo(treed, model="multipsi", meserr=se, y=x, psi=twopsi$psi[,1], la=LA, branchLabels = branchLabels), no.margin=T, show.tip.label=F, edge.width=2, edge.color=c("blue","red")[as.numeric(branchLabels)]) #to test whether the speciational component differs between two traits (i.e. they have different psi values). To compare the likelihoods of models in which two traits are fit separately or together, it's necessary to set "covPIC = FALSE" so that the off-diagonal elements of the PIC matrix are zero for both models. psi1<-transformPhylo.ML(x2, treed, model="psi", la=LA, covPIC=FALSE) psi2d<-transformPhylo.ML(x2[,1,drop=F], treed, model="psi", la=LA) psi2g<-transformPhylo.ML(x2[,2,drop=F], treed, model="psi", la=LA) #LR test for the 2-psi vs single-psi model, adding likelihoods of the individual fits to get a combined likelihood to compare to the shared fit c(LR=2*((psi2d$Max+psi2g$Max)-psi1$Max), p=pchisq(2*((psi2d$Max+psi2g$Max)-psi1$Max), 1, lower.tail=F)) #and AIC comparisons aic12<-getAIC(c(psi1$Max, psi2d$Max+psi2g$Max), np=c(3,4), AICc=T, n=66) aic12-min(aic12) getAICweights(aic12) #separate likelihood curves for each trait, showing that the psi estimate for depth is considerably higher than for gill raker number xx<-seq(0,1,by=0.025) logl<-matrix(NA,length(xx),3,dimnames=list(NULL,c("depth","GRN","shared"))) for(i in 1:length(xx)){ logl[i,1]<-transformPhylo.ll(x2[,1,drop=F],treed, model="psi",psi=xx[i],la=LA)$logL[1,1]+psi2g$Max logl[i,2]<-transformPhylo.ll(x2[,2,drop=F],treed, model="psi",psi=xx[i],la=LA)$logL[1,1]+psi2d$Max logl[i,3]<-transformPhylo.ll(x2,treed, model="psi",psi=xx[i],la=LA)$logL[1,1] } logl<-apply(logl,2,function(x)x-max(x)) plot(NA,xlim=c(0,1),ylim=range(logl),ylab="relative logL", xlab=expression(paste(psi)),xaxt="n",cex.lab=1.3,cex.axis=1.3) axis(1,at=seq(0,1,by=0.25),cex.axis=1.3) for(i in 1:3) lines(xx,logl[,i],lwd=3,col=c("orange","forestgreen","black")[i]) points(c(psi2d$psi[1], psi2g$psi[1], psi1$psi[1]), rep(0,3), col=c("orange","forestgreen","black"), pch=21, bg="white", cex=1.5, lwd=2) abline(h=0-1.92,lty="dashed") #finally, to see how higher extinction affects the estimation we can enforce a high relative extinction rate (eg. a = mu/lambda = 0.5), which may be estimated from the fossil record or we may just want to test sensitivity to different rates of turnover ra2<-bdFixedA(as.numeric(branching.times(treed)), a=0.5) LA2<-as.numeric(ra2$r1/(1-ra2$a));MU2<-as.numeric(ra2$r1/(1/ra2$a-1)) LA2;MU2 #sample numbers of "hidden" speciation events (Shid) on each branch (hidden by later extinction of one lineage). This will use Poisson sampling to place an integer number of Shid on each branch; to use the expected number (potentially non-integer), set "useMean = TRUE" treed2<-sampleShid(treed, la=LA2, mu=MU2, useMean = FALSE) treed2$Shid bm2<-transformPhylo.ML(x, treed2, model="bm", meserr=se) psi2<-transformPhylo.ML(x, treed2, model="psi", meserr=se, la=LA) twopsi2<-transformPhylo.ML(x, treed2, model="multipsi", meserr=se, la=LA, branchLabels=branchLabels) #the effect on the estimates will usually be relatively minor, though this depends on our assumption that lambda and mu are constant across the tree psi;psi2 twopsi;twopsi2 #to account for uncertainty in the number of hidden speciation events per branch, one option is to iterate this for many realizations of Shid and average results similar to averaging across a sample of trees bmSh<-psiSh<-twopsiSh<-psi1Sh<-psi2dSh<-psi2gSh<-list() for(i in 1:100){ treedSh<-sampleShid(treed,la=LA2,mu=MU2) bmSh[[i]]<-transformPhylo.ML(x,treedSh,model="bm",meserr=se) psiSh[[i]]<-transformPhylo.ML(x,treedSh,model="psi",meserr=se,la=LA) twopsiSh[[i]]<-transformPhylo.ML(x,treedSh,model="multipsi", meserr=se,la=LA,branchLabels=branchLabels) psi1Sh[[i]]<-transformPhylo.ML(x2,treedSh,model="psi",la=LA2) psi2dSh[[i]]<-transformPhylo.ML(x2[,1,drop=F],treedSh,model="psi",la=LA2) psi2gSh[[i]]<-transformPhylo.ML(x2[,2,drop=F],treedSh,model="psi",la=LA2) } #summarize the results for the psi and clade-psi model fits (delta-AICc and Akaike weights) summary(sapply(psiSh,function(x)x$psi[1])) lnls<-cbind(sapply(bmSh,function(x)x$logL[1]),sapply(psiSh,function(x)x$Max[1]),sapply(twopsiSh,function(x)x$Max[1])) aics<-t(apply(lnls,1,function(x)getAIC(x,2:4,66))) daics<-matrix(t(apply(aics,1,function(x)x-min(x))),ncol=3,dimnames=list(NULL,c("bm","psi","multipsi"))) apply(daics,2,summary) apply(t(apply(daics[,2:3],1,function(x)getAICweights(x))),2,summary) #summarize the results for the trait-psi and the shared-psi models aic12<-daic12<-wA12<-matrix(NA,100,2,dimnames=list(NULL,c("psi1","psi2"))) for(i in 1:100){ aic12[i,]<-getAIC(c(psi1Sh[[i]]$Max,psi2dSh[[i]]$Max+psi2gSh[[i]]$Max),np=c(3,4),AICc=T,n=66) daic12[i,]<-aic12[i,]-min(aic12[i,]) wA12[i,]<-getAICweights(aic12[i,]) } apply(daic12,2,summary) sum(daic12[,2]