library(ape);library(geiger);library(laser);library(subplex) #first read in all functions below, starting with 'get_psi' #load rockfish data and tree seb<-read.csv("rockfish_depth.csv",row.names=1) sebtree<-read.tree("rockfish.phy") #ensure data and tree names match all(rownames(seb)==sebtree$tip.label) M_psi<-get_psi(dat=seb[,1],tree=sebtree,dat_se=NULL,fix_a=NULL,conf_int=FALSE) #fit alternate candidate models M_bm<-fitContinuous(sebtree,seb[,1],model="BM") M_eb<-fitContinuous(sebtree,seb[,1],model="EB") M_ou<-fitContinuous(sebtree,seb[,1],model="OU") #compare fit using AIC weights get_AIC_weights(c(M_psi[4],M_bm$Trait1$aicc,M_eb$Trait1$aicc,M_ou$Trait1$aicc),name=c("psi","bm","eb","ou")) #impose a higher rate of turnover (mu/lambda) rather than use the ML value (zero) M_psi2<-get_psi(dat=seb[,1],tree=sebtree,dat_se=NULL,fix_a=0.25,conf_int=FALSE) #get the upper and lower confidence limits on psi using marginal likelihood (warning - slow) M_psi3<-get_psi(dat=seb[,1],tree=sebtree,dat_se=NULL,fix_a=NULL,conf_int=TRUE) #for data with NA's in it, the missing species will be dropped, but first the nodes that are being deleted are added to the known speciation events for the remaining branches (Sobs). seb2<-seb;seb2[sample(1:66,10),1]<-NA M_psi4<-get_psi(dat=seb2[,1],tree=sebtree,dat_se=NULL,fix_a=NULL,conf_int=FALSE) #####the main function - calls on lots of others########### #dat should be a vector either matched up to the tree tip.labels or with identical names. dat_se can be a vector of std errs the same length as dat if available. fix_a can be a single value for the turnover (mu/lambda) to impose (rather than estimate both lambda and mu). conf_int is very slow in my coding get_psi<-function(dat,tree,dat_se=NULL,fix_a=NULL,conf_int=FALSE){ nsp_tree<-length(tree$tip.label) ltt<-branching.times(tree) #estimate ML speciation and extinction rates...if a turnover rate was provided (mu/lambda), use it and get the ML lambda if(!is.null(fix_a)){ ra<-suppressWarnings(subplex(0.5,L_b,a=fix_a,N=length(ltt),x=ltt)) lamu<-c(lambda=ra$par[1]/(1-fix_a),mu=ra$par[1]/(1/fix_a-1),LH=-ra$value,r=ra$par[1],a=fix_a) }else{ ra<-suppressWarnings(subplex(c(0.5,0.1),L_bd,N=length(ltt),x=ltt)) lamu<-c(lambda=ra$par[1]/(1-ra$par[2]),mu=ra$par[1]/(1/ra$par[2]-1),LH=-ra$value,r=ra$par[1],a=ra$par[1]) #used a bounded search with a >=1e-8 - assume if this is the estimate of a it is actually zero (makes no meaningful difference) if(ra$par[2]<1e-7){ ra<-suppressWarnings(subplex(0.5,L_b,a=0,N=length(ltt),x=ltt)) lamu<-c(lambda=ra$par[1]/(1-0),mu=ra$par[1]/(1/0-1),LH=-ra$value,r=ra$par[1],a=0) } } #prune data and tree if any NAs, but keep the known speciation events (stored in 'So') tree1<-tree;tree1$edge.length[]<-1 if(is.null(dat_se))dat_se<-rep(0,nsp_tree) if(any(is.na(dat))){ tree<-drop.tip(tree,which(is.na(dat)));tree1<-drop.tip(tree1,which(is.na(dat))) dat_se<-dat_se[-which(is.na(dat))] dat<-dat[-which(is.na(dat))] } nsp<-length(dat) Sobs<-tree1$edge.length Shid<-get_Sh(lamu,tree) Shid[is.na(Shid)]<-0 #get error for zero-branch lengths if(lamu[2]==0)Shid[]<-0 #get_Sh leads to slightly non-zero Shid even for mu=0 (order of 1e-16) #get BM rate parameter for starting point of search BMrate<-fitContinuous(tree,dat,meserr=dat_se,model="BM")$Trait1$beta print("estimating psi") psi_out<-get_Sac(rate0=BMrate,pC0=0.1,get_ML=TRUE,LaMu=lamu,phy=tree,yy=dat,meserr=dat_se,So=Sobs,Sh=Shid) AICc<-get_AIC(-psi_out$value,k=2,AICc=TRUE,n=nsp) if(conf_int){ print("getting confidence interval") crit<-psi_out$value+1.92 f<-function(pC){ g<-function(tR)get_Sac(tR,pC,get_ML=FALSE,LaMu=lamu,phy=tree,yy=dat,meserr=dat_se,So=Sobs,Sh=Shid) optimize(g,c(0,10),maximum=FALSE)$objective-crit } if(f(0)>0){ psi_CI_L<-uniroot(f,c(0,psi_out$par[2]))$root }else{ psi_CI_L<-0 } if(f(1)>0){ psi_CI_U<-uniroot(f,c(psi_out$par[2],1))$root }else{ psi_CI_U<-1 } }else{ psi_CI_L<-psi_CI_U<-NA } c(psi=psi_out$par[2],totRate=psi_out$par[1],LnL=-psi_out$value,AICc=AICc,psi_CI_L=psi_CI_L,psi_CI_U=psi_CI_U,lamu[1],lamu[2],Nsp=nsp,Nsp_tree=nsp_tree) #end of function } #component functions# ################################################################ #supply the raw log-likelihoods as well as total np and if AICc get_AIC<-function(LHs,k=2,AICc=FALSE,n=NULL){ ifelse(AICc,2*k*(n-1)/(n-k-2),2*k)+2*sum(-LHs) } get_AIC_weights<-function(AIC,name=NULL){ dAIC<-AIC-min(AIC) AICw<-exp(-dAIC/2)/sum(exp(-dAIC/2)) if(!is.null(name))names(AICw)<-name AICw } L_bd<- function(v,N,x){ r <- v[1] a <- v[2] if(r<1e-8|a<1e-8){ res<-Inf }else{ res<- -(sum(log(1:(N - 1))) + ((N - 2) * log(r)) + (r * sum(x[3:N])) + (N * log(1 - a)) - 2 * sum(log(exp(r * x[2:N])-a))) } res } L_b<- function(r,a=0,N,x){ if(r<1e-8){ res<-Inf }else{ res<- -(sum(log(1:(N - 1))) + ((N - 2) * log(r)) + (r * sum(x[3:N])) + (N * log(1 - a)) - 2 * sum(log(exp(r * x[2:N])-a))) } res } get_So<-function(tree,prEvolSo=1){ expSo<-rep(prEvolSo,dim(tree$edge)[1]) expSo } get_Sh<-function(lamu,tree,prEvolSh=1){ nodedf<-getbranches(tree) meanalpha<-Calc_Mean_Alpha(nodedf[,3],nodedf[,4],lamu[1],lamu[2]) expSh<-prEvolSh*meanalpha*lamu[1]*(nodedf[,3]-nodedf[,4]) # if(lamu[2]==0)expSh<-rep(0,length(alphaa)) expSh } Calc_Mean_Alpha<-function(a,b,lambda,mu){ (-(a*lambda+mu*a-log(lambda*exp(a*lambda)-mu*exp(mu*a))-b*lambda-mu*b+log(lambda*exp(b*lambda)-mu*exp(mu*b)))/lambda)/(b-a) } #rate0=vector w length nTrait;pC0=scalar or vector w length nTrait get_Sac<-function(rate0,pC0,get_ML=TRUE,LaMu=lamu,phy=tree,yy=y,meserr=NULL,So=Sobs,Sh=Shid){ yy<-data.frame(yy) nTrait<-dim(yy)[2] nsp<-dim(yy)[1] if(length(rate0)!=nTrait|length(pC0)%in%c(nTrait,1)==F)stop("error in initial values") diffpC<-ifelse(length(pC0)==nTrait,TRUE,FALSE) if(is.null(meserr))meserr<-rep(0,nTrait) if(!is.data.frame(meserr))meserr<-as.data.frame(matrix(meserr,ncol=nTrait,nrow=nsp)) vcv_a<-vcv.phylo(phy) tempphy<-phy;tempphy$edge.length<-Sh+So vcv_c<-vcv.phylo(tempphy) if(get_ML==TRUE){ res<-subplex(c(rate0,pC0),L_Sac,control=list(parscale=c(rate0*0.1,rep(0.1,length(pC0)))),nT=nTrait,dpC=diffpC,vcva=vcv_a,vcvc=vcv_c,y=yy,meserr=meserr,la=as.numeric(LaMu[1])) }else{ res<-L_Sac(c(rate0,pC0),nT=nTrait,dpC=diffpC,vcva=vcv_a,vcvc=vcv_c,y=yy,meserr=meserr,la=as.numeric(LaMu[1])) } res } L_Sac<-function(x,nT=1,dpC=FALSE,vcva=vcv_a,vcvc=vcv_c,y=y,meserr=0,la=0){ if(dpC==FALSE)x<-c(x,rep(x[nT+1],nT-1)) if(any(x[1:nT]<=0)|any(x[(nT+1):(nT+nT)]<0)|any(x[(nT+1):(nT+nT)]>1)){ L<--Inf }else{ L<-0 for(i in 1:nT){ Sa<-x[i]*(1-x[nT+i]) Sc<-x[i]*(x[nT+i]/la) vv<-vcva*Sa+vcvc*Sc diag(vv)<-diag(vv)+meserr[,i]^2 ymu<-rep(phylogMean(vv,y[,i]),dim(vv)[1]) L<-L+dmvnorm(y[,i],ymu,vv,log=T) } } -L } getbranches<-function(tree){ Ntip<-length(tree$tip.label) Nedge<-dim(tree$edge)[1] Nnode<-tree$Nnode tree2<-tree if(is.null(tree2$node.label))tree2$node.label<-names(branching.times(tree2)) tempnode<-tree2$node.label tree2$node.label<-NULL tree<-new2old.phylo(tree) nodedepth<-branching.times(tree2) stemdepth<-numeric() stemdepth[1]<-nodedepth[1] for(i in 2:Nnode){ anc<-which(as.numeric(tree$edge[,2])==-i) stemdepth[i]<-nodedepth[names(nodedepth)==tree2$edge[anc,1]] } tree<-tree2 nodes<-which(tree$edge[,2]>Ntip) tips<-which(tree$edge[,2]<=Ntip) tipnotnode<-rep(0,Nedge) tipnotnode[tips]<-1 tstart<-tend<-numeric(Nedge) edgename<-character(Nedge) tstart[nodes]<-stemdepth[-1];tstart[tips]<-tree$edge.length[tips] tend[nodes]<-nodedepth[-1];tend[tips]<-rep(0,Ntip) edgename[nodes]<-tempnode[-1];edgename[tips]<-tree$tip.label return(data.frame(tipnotnode,edgename,tstart,tend)) } #from geiger I think phylogMean<-function(phyvcv, data) { o<-rep(1, length(data)) ci<-solve(phyvcv) m1<-solve(t(o) %*% ci %*% o) m2<-t(o) %*% ci %*% data return(m1 %*% m2) }