# # logithom package # Copyright (C) 2006 Jan de Leeuw # UCLA Department of Statistics, Box 951554, Los Angeles, CA 90095-1554 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ################################################################### # # version 1.0.0, 2006-05-05, split off from scalassoc # version 1.1.0, 2006-05-06, ad-hoc homals can now do princals # version 1.2.0, 2006-05-07, special purpose matrix approximation program # version 1.3.0, 2006-05-07, logitHom can now do single nominal # version 1.4.0, 2006-05-12, now has timer and outfile like homals # logitHom<-function( dframe, # data frame ndim=2, # dimensionality outeps=1e-4, inseps=1e-4, inbeps=1e-4, bias=TRUE, rk=rep(ndim,ncol(dframe)), outmax=500, insmax=1, inbmax=1, offset=1.20, rowplot=FALSE, colplot=FALSE, voroplot=FALSE, vorolab=FALSE, movieplot=FALSE, movielab=FALSE, moviequick=FALSE, ffmpeg=moviequick, jointplot=FALSE, verbose=TRUE, outwrite=TRUE, objwrite=FALSE, timer=FALSE) { if (timer) stime<-proc.time() name<-deparse(substitute(dframe)); nvar<-dim(dframe)[2] parts<-sapply(1:nvar,function(j) length(table(dframe[,j]))) mis<-ifelse(is.na(dframe),0,1); indi<-as.matrix(expandFrame(dframe)) up<-cumsum(parts); lw<-1+c(0,up[-nvar]) nrow<-dim(indi)[1]; ncol<-dim(indi)[2]; itel<-1 rd<-matrix(0,nrow,nvar); cd<-rep(0,ncol) b<-rep(1,ncol); a<-mis; ab<-matrix(0,nrow,ncol) for (j in 1:nvar) { idi<-lw[j]:up[j]; dmi<-indi[,idi]; cd[idi]<-colSums(dmi) ab[,idi]<-outer(a[,j],b[idi]) } dmin<-2*sum(indi) z<-stripHomals(dframe,ndim,rk) x<-z$x; y<-z$y phi<-tcrossprod(x,y); ep<-exp(phi); lb<-ab*ep; pdev<-(2*(sum(indi*lb)-sum(xlogy(indi,lb))))-dmin pterm<-sum(sapply(1:nvar,function(j) sum(mis[,j]*lb[,lw[j]:up[j]]))) pdev<-(2*(pterm-sum(xlogy(indi,lb))))-dmin if (moviequick) system(paste("mkdir ",name,"_movie",sep="")) if (outwrite) outfile<-file(paste(name,"out",sep="."),"w") if (timer) itime<-proc.time() if (outwrite) cat("Iterations:\n",file=outfile) repeat { for (j in 1:nvar) { idi<-lw[j]:up[j]; emi<-ep[,idi] if (bias) b[idi]<-cd[idi]/as.vector(a[,j]%*%emi) a[,j]<-mis[,j]/as.vector(emi%*%b[idi]) ab[,idi]<-outer(a[,j],b[idi]) } lb<-ab*ep pterm<-sum(sapply(1:nvar,function(j) sum(mis[,j]*lb[,lw[j]:up[j]]))) devab<-(2*(pterm-sum(xlogy(indi,lb))))-dmin targ<-phi+2*(indi-lb) uu<-stripRect(targ,mis,ndim,x,y,rk,lw,up) if (movieplot) myMoviePlot(x,uu$x,movielab) if (moviequick) { setwd(paste(name,"_movie",sep="")) jpeg(paste("file",as.character(itel),".jpg",sep="")) myMoviePlot(x,uu$x,movielab) dev.off() setwd("..") } x<-uu$x; y<-uu$y; phi<-tcrossprod(x,y); ep<-exp(phi); lb<-ab*ep pterm<-sum(sapply(1:nvar,function(j) sum(mis[,j]*lb[,lw[j]:up[j]]))) devxy<-(2*(pterm-sum(xlogy(indi,lb))))-dmin if (verbose) writeIter(itel,pdev,devab,devxy) if (outwrite) writeIter(itel,pdev,devab,devxy,myfile=outfile) if ((itel == outmax) || ((pdev-devxy) < outeps)) break() itel<-itel+1; pdev<-devxy } if (timer) ctime<-proc.time() if (ffmpeg) { setwd(paste(name,"_movie",sep="")) system(paste("ffmpeg -i file%d.jpg ",name,".mov",sep="")) system("mv *.mov ..") system("rm *.jpg") setwd("..") system(paste("rm -rf ",name,"_movie",sep="")) } ez<-eigen(crossprod(y)); ezval<-sqrt(sqrt(ez$values)); ezvec<-ez$vectors x<-sqrt(sqrt(nrow/ncol))*x%*%ezvec%*%diag(ezval); y<-sqrt(sqrt(ncol/nrow))*y%*%ezvec%*%diag(1/ezval) if (rowplot) myOnePlot(x,"rowplot",name,bias,"Row Objects",labels(indi)[[1]]) if (colplot) myOnePlot(y,"colplot",name,bias,"Column Objects",labels(indi)[[2]]) if (jointplot) myTwoPlot(x,y,"jointplot",name,bias, "Column Objects (Red) Row Objects (Green)",labels(indi)[[1]],labels(indi)[[2]]) if (voroplot) { pdf(file=paste("voronoi_",name,".pdf",sep=""),encoding="MacRoman") mm1<-max(rbind(x,y)); mm2<-min(rbind(x,y)); mm<-c(mm2,mm1); lb<-labels(indi)[[2]] for (j in 1:nvar) { idi<-lw[j]:up[j]; yj<-y[idi,]; z<-rbind(x,yj); vname<-names(dframe)[j] xx<-c(min(z[,1]),max(z[,1])); yy<-c(min(z[,2]),max(z[,2])) plot(x,type="n",main=paste("Voronoi plot for",name,":",vname), xlab="dimension 1",ylab="dimension 2", xlim=offset*xx,ylim=offset*yy) drawEdges(yj,b[idi],far=1000) if (vorolab) text(x,as.character(dframe[,j])) text(yj,lb[idi],col="red") } dev.off() } #if (outwrite) { # writeHead(name,vname[j],ndim,active[j],rank[j],level[j],outfile) # writeY(data[,j],y,"Y",outfile) # writeY(data[,j],c,"C",outfile) # writeY(data[,j],u,"Z",outfile) # writeA(t(a),outfile) # } if (objwrite) { sink(paste(name,"obj",sep=".")) rownames(x)<-labels(dframe)[[1]] print(x) sink() } if (timer && outwrite) { otime<-proc.time() cat("Input time: ",formatC(itime-stime,digits=6,width=10,format="f"),"\n",file=outfile) cat("Compute time: ",formatC(ctime-itime,digits=6,width=10,format="f"),"\n",file=outfile) cat("Iteration time: ",formatC((ctime-itime)/itel,digits=6,width=10,format="f"),"\n",file=outfile) cat("Output time: ",formatC(otime-ctime,digits=6,width=10,format="f"),"\n",file=outfile) } if (outwrite) close(outfile) return(list(x=x,y=y,a=a,b=b,dev=devxy)) } expandFrame<-function(tab,clean=TRUE,zero=TRUE,returnFrame=TRUE) { n<-dim(tab)[1]; m<-dim(tab)[2]; g<-matrix(0,n,0); l<-rep("",0) lab1<-labels(tab)[[1]]; lab2<-labels(tab)[[2]] for (j in 1:m) { y<-as.factor(tab[,j]); h<-levels(y) g<-cbind(g,ifelse(outer(y,h,"=="),1,0)) l<-c(l,paste(lab2[j],"_",h,sep="")) } if (zero) g<-ifelse(is.na(g),0,as.matrix(g)) if (clean) { g<-g[which(rowSums(g)>0),which(colSums(g)>0)] g<-g[,which(colSums(g) 1) {xx<-x[,1:(j-1)]; x[,j]<-x[,j]-xx%*%(crossprod(xx,(w*x[,j])))} s<-sqrt(sum(w*x[,j]^2)); ss<-c(ss,s); x[,j]<-x[,j]/s;} list(pol=x,fac=ss) } orthogonalPolynomials<-function(w,x,p) { z<-weightedGramSchmidt(outer(x,0:p,"^"),w)$pol[,2:(p+1)] } centerX<-function(x,w) apply(x,2,function(z) z-weighted.mean(z,w)) normX<-function(x,w) { qq<-La.svd(sqrt(w)*x); list(q=(1/sqrt(w))*(qq$u),r=qq$d) } makeEdges<-function(a,b) { c<--log(b); n<-length(b); lns<-matrix(0,0,8) for (i in 1:(n-1)) { for(j in (i+1):n) { dd<-a[i,]-a[j,]; dc<-c[i]-c[j]; ss<-sum(dd^2) if (is.nul(ss)) next() ee<-dc*dd/ss; ff<-c(-dd[2],dd[1]) xlw<--Inf; xup<-Inf for (k in (1:n)[-c(i,j)]) { dd<-a[i,]-a[k,]; dc<-c[i]-c[k] mum<-sum(dd*ff); mom<-dc-sum(dd*ee) if (is.nul(mum) & (mom > 0)) { xlw<-Inf; xup<--Inf } if (mum>0) xlw<-max(xlw,mom/mum) if (mum<0) xup<-min(xup,mom/mum) } if (xlw