# # singlePeaked package # Copyright (C) 2006 Jan de Leeuw # UCLA Department of Statistics, Box 951554, Los Angeles, CA 90095-1554 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ################################################################### # # version 0.0.1, 2006-05-12, first alpha # version 0.0.2, 2006-05-14, fixed stripCA # singlePeaked<-function( dframe, # data frame ndim=2, # dimensionality outeps=1e-4, inseps=1e-4, inbeps=1e-4, biasFunc=addBias, # addBias, rowBias, colBias, regRowBias, regColBias, regFullBias biasMat=FALSE, outmax=500, insmax=1, inbmax=1, offset=1.20, rowplot=FALSE, colplot=FALSE, movieplot=FALSE, movielab=FALSE, moviequick=FALSE, ffmpeg=moviequick, jointplot=FALSE, verbose=TRUE, outwrite=TRUE, objwrite=FALSE, timer=FALSE) { if (timer) stime<-proc.time() name<-deparse(substitute(dframe)); nvar<-ncol(dframe); nobj<-nrow(dframe) mis<-ifelse(is.na(dframe),0,1); itel<-1 z<-stripCA(dframe,ndim); x<-z$x; y<-z$y phi<-tcrossprod(x,y); ep<-exp(phi); bf<-biasFunc(ep,init=FALSE,regr=biasMat,verbose=FALSE,eps=inbeps,itmax=inbmax) xi<-bf$xi; b<-bf$b; lb<-b*ep; pi<-lb/(1+lb) pdev<-2*(sum(mis*log(lb))-sum(mis*log(1+lb))) if (moviequick) system(paste("mkdir ",name,"_movie",sep="")) if (outwrite) outfile<-file(paste(name,"out",sep="."),"w") if (timer) itime<-proc.time() if (outwrite) cat("Iterations:\n",file=outfile) repeat { bf<-biasFunc(ep,init=xi,regr=biasMat,verbose=FALSE,eps=inbeps,itmax=inbmax) xi<-bf$xi; b<-bf$b; lb<-b*ep devab<-2*(sum(mis*log(lb))-sum(mis*log(1+lb))) targ<-phi+(dframe-lb)/a uu<-stripSPRect(targ,mis,ndim,x,y) if (movieplot) myMoviePlot(x,uu$x,movielab) if (moviequick) { setwd(paste(name,"_movie",sep="")) jpeg(paste("file",as.character(itel),".jpg",sep="")) myMoviePlot(x,uu$x,movielab) dev.off() setwd("..") } x<-uu$x; y<-uu$y; phi<-tcrossprod(x,y); ep<-exp(phi); lb<-ab*ep devxy<-2*(sum(mis*log(lb))-sum(mis*log(1+lb))) if (verbose) writeIter(itel,pdev,devab,devxy) if (outwrite) writeIter(itel,pdev,devab,devxy,myfile=outfile) if ((itel == outmax) || ((pdev-devxy) < outeps)) break() itel<-itel+1; pdev<-devxy } if (timer) ctime<-proc.time() if (ffmpeg) { setwd(paste(name,"_movie",sep="")) system(paste("ffmpeg -i file%d.jpg ",name,".mov",sep="")) system("mv *.mov ..") system("rm *.jpg") setwd("..") system(paste("rm -rf ",name,"_movie",sep="")) } ez<-eigen(crossprod(y)); ezval<-sqrt(sqrt(ez$values)); ezvec<-ez$vectors x<-sqrt(sqrt(nrow/ncol))*x%*%ezvec%*%diag(ezval); y<-sqrt(sqrt(ncol/nrow))*y%*%ezvec%*%diag(1/ezval) if (rowplot) myOnePlot(x,"rowplot",name,bias,"Row Objects",labels(indi)[[1]]) if (colplot) myOnePlot(y,"colplot",name,bias,"Column Objects",labels(indi)[[2]]) if (jointplot) myTwoPlot(x,y,"jointplot",name,bias, "Column Objects (Red) Row Objects (Green)",labels(indi)[[1]],labels(indi)[[2]]) #if (outwrite) { # writeHead(name,vname[j],ndim,active[j],rank[j],level[j],outfile) # writeY(data[,j],y,"Y",outfile) # writeY(data[,j],c,"C",outfile) # writeY(data[,j],u,"Z",outfile) # writeA(t(a),outfile) # } if (objwrite) { sink(paste(name,"obj",sep=".")) rownames(x)<-labels(dframe)[[1]] print(x) sink() } if (timer && outwrite) { otime<-proc.time() cat("Input time: ",formatC(itime-stime,digits=6,width=10,format="f"),"\n",file=outfile) cat("Compute time: ",formatC(ctime-itime,digits=6,width=10,format="f"),"\n",file=outfile) cat("Iteration time: ",formatC((ctime-itime)/itel,digits=6,width=10,format="f"),"\n",file=outfile) cat("Output time: ",formatC(otime-ctime,digits=6,width=10,format="f"),"\n",file=outfile) } if (outwrite) close(outfile) return(list(x=x,y=y,a=a,b=b,dev=devxy)) } stripSPRect<-function(target,w,ndim=2,x,y,rk,lw,up,itmax=100,eps=1e-6,verbose=FALSE) { nobj<-nrow(target); nvar<-length(rk); xy<-tcrossprod(x,y); oloss<-0; itel<-1 for (j in 1:nvar) { indi<-lw[j]:up[j] oloss<-oloss+sum(w[,j]*(target[,indi]-xy[,indi])^2) } repeat { sj<-matrix(0,nobj,ndim) for (j in 1:nvar) { indi<-lw[j]:up[j]; yj<-y[indi,] tj<-target[,indi]; sj<-sj+w[,j]*tj%*%yj } for (i in 1:nobj) { cj<-matrix(0,ndim,ndim) for (j in 1:nvar) { indi<-lw[j]:up[j]; yj<-y[indi,] cj<-cj+w[i,j]*crossprod(yj) } x[i,]<-solve(cj,drop(sj[i,])) } xy<-tcrossprod(x,y); xloss<-0 for (j in 1:nvar) { indi<-lw[j]:up[j] xloss<-xloss+sum(w[,j]*(target[,indi]-xy[,indi])^2) } for (j in 1:nvar) { indi<-lw[j]:up[j]; tj<-target[,indi]; sj<-crossprod(x,w[,j]*tj); cj<-crossprod(x,w[,j]*x) yj<-t(solve(cj,sj)) if (rk[j] < ndim) { rj<-chol(cj); zj<-lowRank(tcrossprod(yj,rj),rk[j]) yj<-t(solve(rj,t(zj))) } y[indi,]<-yj } xy<-tcrossprod(x,y); yloss<-0 for (j in 1:nvar) { indi<-lw[j]:up[j] yloss<-yloss+sum(w[,j]*(target[,indi]-xy[,indi])^2) } if (verbose) cat("Iteration: ",formatC(itel,digits=6,width=6), "Previous Loss: ",formatC(oloss,digits=6,width=12,format="f"), "Loss after X: ",formatC(xloss,digits=6,width=12,format="f"), "Loss after Y: ",formatC(yloss,digits=6,width=12,format="f"), "\n") if (((oloss-yloss) < eps) || (itel == itmax)) break() oloss<-yloss; itel<-itel+1 } q<-qr(x); x<-qr.Q(q); y<-tcrossprod(y,qr.R(q)) return(list(x=x,y=y)) } stripCA<-function(dframe,ndim=2,eps=1e-6,itmax=100,verbose=FALSE) { nobj<-nrow(dframe); nvar<-ncol(dframe); iter<-1; pops<-0; zmat<-as.matrix(ifelse(is.na(dframe),0,dframe)) rsums<-rowSums(zmat); csums<-colSums(zmat) x<-weightedGramSchmidt(cbind(orthogonalPolynomials(rsums,1:nobj,ndim)),rsums)$pol repeat { y<-crossprod(zmat,x)/csums x<-weightedGramSchmidt((zmat%*%y)/rsums,rsums)$pol cops<-sum(zmat*tcrossprod(x,y)) if (verbose) cat("Iteration: ",formatC(iter,digits=6,width=6), "Previous Gain: ",formatC(pops,digits=6,width=12,format="f"), "Current Gain: ",formatC(cops,digits=6,width=12,format="f"), "\n") if (((cops - pops) < eps) || (iter == itmax)) break() pops<-cops; iter<-iter+1 } return(list(a=rsums,b=csums,x=x,y=y)) } addBias<-function(ep,init,regr=FALSE,verbose=FALSE,eps=1E-6,itmax=100) { nobj<-nrow(ep); nvar<-ncol(ep); u<-c(rowSums(dfr),colSums(dfr)); xi<-init repeat{ gm<-outer(xi[1:nobj],xi[nobj+(1:nvar)],"+"); b<-exp(gm); lb<-b*ep; pi<-lb/(1+lb); v<-c(rowSums(pi),colSums(pi)); a<-((2*pi)-1)/gm } } rowBias<-function() { } colBias<-function() { } myOnePlot<-function(x,head,name,bias,titl,labs) { if (bias) tb<-"B" else tb<-"N" pdf(file=paste(head,"_",name,"_",tb,".pdf",sep=""),encoding="MacRoman") plot(x,type="n",xlab="dimension 1",ylab="dimension 2") title(main=paste(titl,name)) text(x,as.character(labs)) dev.off() } myMoviePlot<-function(x,y,movielab=FALSE) { plot(x,xlab="dimension 1",ylab="dimension 2",type="n") if (movielab) { text(x,as.character(1:dim(x)[1]),pch="+",col="GREEN") text(y,as.character(1:dim(x)[1]),pch="+",col="RED") } else { points(x,pch="+",col="GREEN") points(y,pch="+",col="RED") } sapply(1:dim(x)[1],function(i) lines(matrix(c(x[i,1:2],y[i,1:2]),2,2,byrow=TRUE))) } myTwoPlot<-function(x,y,head,name,bias,titl,labs1,labs2) { if (bias) tb<-"B" else tb<-"N" pdf(file=paste(head,"_",name,"_",tb,".pdf",sep=""),encoding="MacRoman") plot(rbind(x,y),type="n",xlab="dimension 1",ylab="dimension 2") title(main=paste(titl,name)) text(x,as.character(labs1),col="RED") text(y,as.character(labs2),col="GREEN") dev.off() } writeIter<-function(itel,pdev,devab,devxy,myfile="") { cat("Iteration: ",formatC(itel,digits=6,width=6), "Previous Deviance: ",formatC(pdev,digits=6,width=12,format="f"), "After a and/or b: ",formatC(devab,digits=6,width=12,format="f"), "After X and/or Y: ",formatC(devxy,digits=6,width=12,format="f"), "\n",file=myfile) } writeHead<-function(name,vname,p,a,r,c,ofile) { s<-NULL; sl<-35+10*p; for (i in 1:sl) s<-paste(s,"*",sep="") cat("\n",formatC(s,format="s"),"\n",file=ofile,sep="") cat(formatC(vname,format="s"), if (a) "(Active)," else "(Passive),", "Rank =", formatC(r,width=1),"and", "Level is", switch(c,"NO"="nominal","OR"="ordinal","NU"="numerical","PO"="polynomial"), "\n",file=ofile) cat(formatC(s,format="s"),"\n",file=ofile) } writeY<-function(g,y,t,ofile) { d<-as.vector(table(g));l<-levels(g) s<-NULL; sl<-35+10*dim(y)[2]; for (i in 1:sl) s<-paste(s,"*",sep="") u<-NULL; sl<-35+10*dim(y)[2]; for (i in 1:sl) u<-paste(u,"-",sep="") switch(t,"Z"=cat("Lower Rank Quantifications\n",file=ofile),"C"=cat("Category Centroids\n",file=ofile), "Y"=cat("Rank-restricted Category Quantifications\n",file=ofile)) cat(formatC(u,format="s"),"\n",file=ofile) for (k in 1:length(d)) cat(formatC(l[k],width=10,format="s"), formatC(d[k],width=5)," *** ", formatC(y[k,],digits=6,width=10,format="f")," *** ", formatC(sum(d[k]*y[k,]^2),digits=6,width=10,format="f"),"\n",sep="",file=ofile) cat(formatC(u,format="s"),"\n",file=ofile) cat(formatC(" ",format="s",width=10), formatC(sum(d),width=5)," *** ", formatC(d%*%(y^2),digits=6,width=10,format="f")," *** ", formatC(sum(d%*%(y^2)),digits=6,width=10,format="f"),"\n",sep="",file=ofile) cat(formatC(s,format="s"),"\n",file=ofile) } writeA<-function(a,ofile) { s<-NULL; sl<-35+10*dim(a)[2]; for (i in 1:sl) s<-paste(s,"*",sep="") u<-NULL; sl<-35+10*dim(a)[2]; for (i in 1:sl) u<-paste(u,"-",sep="") cat("Category Loadings\n",file=ofile) cat(formatC(u,format="s"),"\n",file=ofile) for (k in 1:dim(a)[1]) cat(formatC(" ",format="s",width=10),formatC(k,width=5)," *** ", formatC(a[k,],digits=6,width=10,format="f")," *** ", formatC(sum(a[k,]^2),digits=6,width=10,format="f"),"\n",sep="",file=ofile) cat(formatC(u,format="s"),"\n",file=ofile) cat(formatC(" ",format="s",width=15)," *** ", formatC(apply(a^2,2,sum),digits=6,width=10,format="f")," *** ", formatC(sum(a^2),digits=6,width=10,format="f"),"\n",sep="",file=ofile) cat(formatC(s,format="s"),"\n",file=ofile) } weightedGramSchmidt<-function(x,w) { ss<-NULL; for (j in 1:dim(x)[2]) { if (j > 1) {xx<-x[,1:(j-1)]; x[,j]<-x[,j]-xx%*%(crossprod(xx,(w*x[,j])))} s<-sqrt(sum(w*x[,j]^2)); ss<-c(ss,s); x[,j]<-x[,j]/s;} list(pol=x,fac=ss) } orthogonalPolynomials<-function(w,x,p) { z<-weightedGramSchmidt(outer(x,0:p,"^"),w)$pol[,2:(p+1)] } centerX<-function(x,w) apply(x,2,function(z) z-weighted.mean(z,w)) normX<-function(x,w) { qq<-La.svd(sqrt(w)*x); list(q=(1/sqrt(w))*(qq$u),r=qq$d) } makeEdges<-function(a,b) { c<--log(b); n<-length(b); lns<-matrix(0,0,8) for (i in 1:(n-1)) { for(j in (i+1):n) { dd<-a[i,]-a[j,]; dc<-c[i]-c[j]; ss<-sum(dd^2) if (is.nul(ss)) next() ee<-dc*dd/ss; ff<-c(-dd[2],dd[1]) xlw<--Inf; xup<-Inf for (k in (1:n)[-c(i,j)]) { dd<-a[i,]-a[k,]; dc<-c[i]-c[k] mum<-sum(dd*ff); mom<-dc-sum(dd*ee) if (is.nul(mum) & (mom > 0)) { xlw<-Inf; xup<--Inf } if (mum>0) xlw<-max(xlw,mom/mum) if (mum<0) xup<-min(xup,mom/mum) } if (xlw