# tools for probit analysis # version 0.0.1 2006-03-28 first online release, rather minimal # version 0.0.2 2006-03-28 nasty bug in genProbit fixed # version 0.1.0 2006-03-28 combined two algorithmic blocks # version 0.2.0 2006-03-28 added linear thresholds # version 0.3.0 2006-03-28 added variable argument for regression and pca # version 0.4.0 2006-03-28 added regression # version 0.5.0 2006-03-28 added PCA # version 0.5.1 2006-03-28 improved, but not fixed, silly iteration structure # version 0.5.2 2006-03-29 iteration information now makes sense # version 0.5.3 2006-03-29 bug fixes # version 0.5.4 2006-03-30 modified threshold iteration for linear case # version 0.6.0 2006-03-30 introduced profile frequencies # version 0.6.1 2006-03-30 function did not return anything ! # version 0.6.2 2006-03-31 bug fixes involving freq # version 0.7.0 2006-03-31 split into two threshold methods # version 0.7.1 2006-03-31 modulus for threshold iterations # genProbit<-function( mat, freq=rep(1,dim(mat)[1]), eps=1e-6, itmax=100, mod=1, verbose=FALSE, proj=pIdent, thres=repList("all",dim(mat)[2]), extra=NULL ) { mat<-as.data.frame(mat); ncat<-as.vector(apply(mat,2,max)); tau<-thres n<-dim(mat)[1]; m<-dim(mat)[2]; itel<-1; tht<-rep(1,m); eta<-matrix(0,n,m); fo<-Inf for (j in 1:m) { cs<-cumsum(sapply(1:ncat[j],function(l) sum(freq[which(mat[,j]==l)]))) tau[[j]]<-qnorm(cs[-length(cs)]/cs[length(cs)]) if (is.double(thres[[j]])) { bb<-lsfit(thres[[j]],tau[[j]])$coef tht[j]<-bb[2]; eta[,j]<-bb[1] tau[[j]]<-thres[[j]]*tht[j] } } eta<-matrix(-21,n,1) repeat { h<-matrix(0,n,m) for (j in 1:m) { op<-outer(eta[,j],tau[[j]],"+") dn<-dnorm(op); pn<-pnorm(op) pp<-colDiff(cbind(0,pn,1)); pd<-colDiff(cbind(0,dn,0)) gg<--(pd/pp) h[,j]<-sapply(1:n,function(i) gg[i,mat[[j]][i]]) } eta<-proj(eta-h,freq,extra); fs<-rep(0,n) for (j in 1:m) { op<-outer(eta[,j],tau[[j]],"+"); pn<-pnorm(op); pp<-colDiff(cbind(0,pn,1)) fs<-fs-2*log(sapply(1:n,function(i) pp[i,mat[[j]][i]])) } fe<-sum(freq*fs) if (itel%%mod == 0) { ft<-0 for (j in 1:m) { if (is.double(thres[[j]])) { th<-thresProp(mat[[j]],eta[,j],tht=tht[j],thres=thres[[j]],freq=freq,verbose=TRUE) tht[j]<-th$tht; tau[[j]]<-tht[j]*thres[[j]]; ft<-ft+th$ff } else { th<-thresFree(mat[[j]],eta[,j],tau=tau[[j]],freq=freq,verbose=FALSE) tau<-th$tau; ft<-ft+th$f } } } else ft<-fe if (verbose) cat( "Iteration: ",formatC(itel,width=3, format="d"), "O-Function: ",formatC(fo,digits=8,width=12,format="f"), "E-Function: ",formatC(fe,digits=8,width=12,format="f"), "T_Function: ",formatC(ft,digits=8,width=12,format="f"), "\n") if (((fo - ft) < eps) || (itel == itmax)) break itel<-itel+1; fo<-ft } return(list(tau=tau,tht=tht,eta=eta)) } pIdent<-function(x,freq,extra) return(x) pAve<-function(x,freq,extra) { for (j in 1:dim(x)[2]) x[,j]<-weighted.mean(x[,j],freq) return(x) } pRegres<-function(x,freq,extra) return(apply(x,2,function(y) y-(lsfit(extra,y,wt=freq)$residuals))) pPCA<-function(x,freq,extra) return(rankApprox(x,freq,extra)) thresFree<-function(mat,eta,tau,freq,eps=1e-6,itmax=100,verbose=FALSE) { k<-length(tau); n<-length(mat); itel<-1 repeat{ ar<-outer(eta,tau,"+") pp<-colDiff(cbind(0,pnorm(ar),1)) f<--2*sum(freq*log(sapply(1:n,function(i) pp[i,mat[i]]))) dn<-dnorm(ar); rs<-matrix(0,n,k+1) for (i in 1:n) rs[i,mat[i]]<-1/pp[i,mat[i]]; ss<-rs/pp gg<--2*sapply(1:k,function(l) sum(freq*dn[,l]*(rs[,l]-rs[,l+1]))) mg<-max(abs(gg)) if (verbose) cat( "** Iteration: ",formatC(itel,width=3, format="d"), "** Function: ",formatC(f,digits=8,width=12,format="f"), "** MaxGrad: ",formatC(mg,digits=8,width=12,format="f"), "\n") if ((mg < eps) || (itel == itmax)) break dg<-2*sapply(1:k,function(l) sum(freq*ar[,l]*dn[,l]*(rs[,l]-rs[,l+1]))) dg<-dg+2*sapply(1:k,function (l) sum(freq*(dn[,l]^2)*(ss[,l]+ss[,l+1]))) dd<-diag(dg) for (i in 1:k-1) dd[i,i+1]<-dd[i+1,i]<--2*sum(freq*dn[,i]*dn[,i+1]*ss[,i+1]) tau<-tau-solve(dd,gg) itel<-itel+1 } return(list(tau=tau,f=f)) } thresProp<-function(mat,eta,tht,thres,freq,eps=1e-6,itmax=100,verbose=FALSE) { k<-length(thres); n<-length(mat); itel<-1 repeat{ ar<-tht*outer(eta,thres,"+"); dn<-dnorm(ar); pn<-pnorm(ar) p0<-colDiff(cbind(0,pn,1)) p1<-colDiff(cbind(0,colScal(dn,thres),0)) p2<-colDiff(cbind(0,colScal(dn*ar,thres^2),0)) ff<--2*sum(freq*log(sapply(1:n,function(i) p0[i,mat[i]]))) gg<--2*sum(freq*sapply(1:n,function(i) (p1/p0)[i,mat[i]])) da<-2*sum(freq*sapply(1:n,function(i) (p2/p0)[i,mat[i]])) dd<-da+2*sum(freq*sapply(1:n,function(i) ((p1/p0)^2)[i,mat[i]])) if ((abs(gg) < eps) || (itel == itmax)) break thu<-tht-(gg/dd) if (gg > 0) tht<-tht/2 else tht<-thu if (verbose) cat( "** Iteration: ",formatC(itel,width=3, format="d"), "** Value: ",formatC(tht,digits=8,width=12,format="f"), "** Function: ",formatC(ff,digits=8,width=12,format="f"), "** FirstDer: ",formatC(gg,digits=8,width=12,format="f"), "** SeconDer: ",formatC(dd,digits=8,width=12,format="f"), "\n") itel<-itel+1 } return(list(tht=tht,ff=ff)) } thresFunc<-function(mat,eta,tht,thres,freq) { ar<-tht*outer(eta,thres,"+"); dn<-dnorm(ar); pn<-pnorm(ar) p0<-colDiff(cbind(0,pn,1)) return(-2*sum(freq*log(sapply(1:length(mat),function(i) p0[i,mat[i]])))) } thresGrad<-function(mat,eta,tht,thres,freq) { ar<-tht*outer(eta,thres,"+"); dn<-dnorm(ar); pn<-pnorm(ar) p0<-colDiff(cbind(0,pn,1)) p1<-colDiff(cbind(0,colScal(dn,thres),0)) return(-2*sum(freq*sapply(1:length(mat),function(i) (p1/p0)[i,mat[i]]))) } rankApprox<-function(x,p) { s<-svd(x) return(s$u[,1:p]%*%(s$d[1:p]*t(s$v[,1:p]))) } colDiff<-function(x) { return(matrix(t(apply(x,1,diff)),dim(x)[1],dim(x)[2]-1)) } colScal<-function(x,y) { return(sapply(1:dim(x)[2],function(j) x[,j]*y[j])) } repList<-function(x,n) { z<-list() for (i in 1:n) z<-c(z,list(x)) return(z) }