library(fastICA) library(JADE) library(pixmap) library(HyperbolicDist) library(ICSNP) library(ICS) library(mvtnorm) library(survey) library(dprep) ######## Datan muunnoksia ja plottauksia ################## ellipse <- function (mu, sigma, alpha = 0.05, npoints = 250) { es <- eigen(sigma) e1 <- es$vec %*% diag(sqrt(es$val)) r1 <- sqrt(qchisq(1 - alpha, 2)) theta <- seq(0, 2 * pi, len = npoints) v1 <- cbind(r1 * cos(theta), r1 * sin(theta)) pts = t(mu - (e1 %*% t(v1))) lines(pts) invisible(pts) } # tasa-arvokäyrät contour<-function(x,alpha = 0.05) { S<-cov(x) m<-apply(x,2,mean) plot(x) ellipse(m,S,alpha) list(mean=m,cov=S) } par(mfcol=c(2,2),mar=c(2,2,2,2),cex=1) x <- rmvnorm(200,c(0,0),matrix(c(4,0.8,0.8,1),2,2)) mx <- apply(x,2,mean) S <- cov(x) contour(x,alpha=0.05) D <- diag(sqrt(diag(S))) xcent <- sweep(x,2,mx,"-") z <- xcent%*%solve(D) # skaalaus contour(z,alpha=0.05) z <- xcent%*%solve(mat.sqrt(S)) # mah-muunnos contour(z,alpha=0.05) S <- cov(x) G <- eigen(S)$vec z <- xcent%*%G # pääkomponentit contour(z,alpha=0.05) ####### Gradienttialgoritmi negentropian maksimoimiseksi: ###### neg_grad <- function(X,w.init,eps=1.0e-6,max.iter=1000,nu=rnorm(10000)) { neg <- NULL m <- apply(X,2,mean) S<-cov(X) Z <- sweep(X,2,m,"-")%*%solve(mat.sqrt(S)) # valkaisu w <- w.init w <- w/sqrt(t(w)%*%w) d <- 1 iter <- 0 while (d > eps) { wz <- Z%*%w gamma <- 2*(mean(G(wz))-mean(G(nu))) alpha <- 1 wk <- w + gamma*alpha*apply(sweep(Z,1,g(wz),"*"),2,mean) wk <- wk/sqrt(t(wk)%*%wk) d <- sqrt(t(wk-w)%*%(wk-w)) w <- wk iter <- iter+1 # talletetaan negentropian arvot kullakin iteraatiolla neg<-c(neg,(gamma/2)^2) if (iter > max.iter) break } list(w=wk%*%solve(mat.sqrt(S)),iter=iter,neg=neg) } G <- function(y) { -exp(-y^2/2) } g <- function(y) { y*exp(-y^2/2) } #################### Esimerkki ################################### S <- matrix(runif(1000,-sqrt(3),sqrt(3)),500,2) A <- matrix(c(1,1,2,5),2,2) X <- S%*%t(A) w1 <- neg_grad(X,c(1,1))$w w2 <- neg_grad(X,c(-1,1))$w W <- rbind(w1,w2) Z <- X%*%t(W) plot(S) # alkuperäiset komponentit X11() plot(Z) # erotellut komponentit plot(neg_grad(X,c(1,1))$neg,type="l") ############## Fast-fixed point algoritmi negentropian maksimoimiseksi ########## one_unit <- function(X,w.init,eps=1.0e-6,max.iter=100,nu=rnorm(10000)) { neg<-NULL m<-apply(X,2,mean) eig<-eigen(cov(X)) E<-eig$vectors D<-eig$values C<-E%*%diag(D^{-1/2})%*%t(E) Z<-sweep(X,2,m,"-")%*%t(C) w<-w.init w<-w/sqrt(t(w)%*%w) d<-1 iter<-0 while (d > eps) { wz<-Z%*%w w1<-g(wz) w2<-gd(wz) wk<-apply(sweep(Z,1,w1,"*"),2,mean)-mean(w2)*w wk<-wk/sqrt(t(wk)%*%wk) d <-sqrt(t(wk-w)%*%(wk-w)) w<-wk iter <- iter+1 negent<-mean(G(wz))-mean(G(nu)) neg<-c(neg,negent^2) if (iter > max.iter) break } list(w=wk%*%C,iter=iter,neg=neg) } ######### Esimerkki ############################ w1 <- one_unit(X,c(1,1))$w w2 <- one_unit(X,c(-1,1))$w W <- rbind(w1,w2) Z <- X%*%t(W) plot(S) # alkuperäiset komponentit X11() plot(Z) # erotellut komponentit plot(one_unit(X,c(1,1))$neg,type="l") ######### Käytetään FastICAa usean ratkaisuvektorin löytämiseksi ####### S <- matrix(runif(1000,-sqrt(3),sqrt(3)),500,2) A <- matrix(c(1,1,2,5),2,2) X <- S%*%t(A) plot(S) sev_unit <- fastICA(X, 2, alg.typ = "deflation", fun = "exp", method = "R") # Huom! fastICA:n W sisältää ortogonaaliset vektorit ja A estimoidun sekoitusmatriisin sev_unit$W sev_unit$A # Eli halutut ratkaisuvektorit saadaan matriisin A^{-1} riveiltä solve(sev_unit$A) # ja riippumattomat komponentit: plot(sev_unit$S) sev_unit <- fastICA(X, 2, alg.typ = "deflation", fun = "logcosh", method = "R") plot(sev_unit$S) ############## MEG-data esimerkki ############################### # 10 pääkomponenttimuuttujaa data<-read.table("http://users.jyu.fi/~slahola/files/ica_datoja/MEG.dat") dim(data) par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 31:40) { plot(data[,i],type="l") } # 10 riippumatonta komponenttia sev_unit <- fastICA(data, 20, alg.typ = "deflation", fun="logcosh", method = "C") par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(sev_unit$S[,i],type="l") } ###################### ICA klusteroinnissa ############ # Acute leukemia data data(pp.golub) x<-as.matrix(pp.golub) z<-x[,3572] x<-as.matrix(x[,1:3571]) data<-rbind(x,z) sev_unit <- fastICA(x, 10, alg.typ = "deflation", fun="logcosh", method = "C") pairs(sev_unit$S) plot(sev_unit$S[,1],sev_unit$S[,2]) plot(sev_unit$S[,1],sev_unit$S[,2],col=z) ################# FOBI ############ # mat.sqrt -funktio laskee matriisin neliöjuuren mat.sqrt <- function(V) { apu <- eigen(V) L <- apu$values P <- apu$vectors P%*%(diag(L))^(1/2)%*%t(P) } FOBI <- function(x) { invS1 <- solve(mat.sqrt(cov(x))) xcent <- sweep(x,2,apply(x,2,mean),"-") S2 <- cov4(xcent%*%t(invS1)) U2 <- eigen(S2)$vectors t(U2)%*%invS1 } # huipukkuusmatriisi cov4 <- function(x) { d <- dim(x)[2] n <- dim(x)[1] xm <- sweep(x,2,colMeans(x),"-") z <- xm%*%solve(mat.sqrt(cov(x))) r <- sqrt(diag(z%*%t(z))) rx <- sweep(xm,1,r,"*") (t(rx)%*%rx/dim(x)[1])/(dim(x)[2]+2) } ######### Esimerkki ############################3 # Valitaan samat marginaalijakaumat s1<-runif(1000,-sqrt(3),sqrt(3)) s2<-runif(1000,-sqrt(3),sqrt(3)) s3<-rexp(1000,1) S <- t(rbind(s1,s2,s3)) cov4(S) A <- matrix(matrix(runif(9)),3,3) X <- S%*%t(A) # Yritetään löytää A^{-1} FOBI -menetelmällä, ts. solve(A) W <- FOBI(X) W Xcent <- sweep(X,2,apply(X,2,mean),"-") Sest <- Xcent%*%t(W) diag(cov4(Sest)) d <- dim(x)[2] # huipukkuusestimaatit (d+2)*diag(cov4(Sest))-d-2 # Valitaan eri marginaalijakaumat s1<-runif(1000,-sqrt(3),sqrt(3)) s2<-rskewlap(1000,c(1/sqrt(2),1/sqrt(2),0)) S <- t(rbind(s1,s2)) A <- matrix(c(1,1,2,5),2,2) X <- S%*%t(A) W<-FOBI(X) Xcent <- sweep(X,2,apply(X,2,mean),"-") Sest <- Xcent%*%t(W) plot(Sest) diag(cov4(Sest)) ############# Robustit hajontamatriisit ############## # HuberM -funktio sivun alalaidassa Dumbgen<-function(x) { invS1<-solve(mat.sqrt(HuberM(x)$cov)) xcent <- sweep(x,2,HuberM(x)$loc,"-") S2<-duembgen.shape(x%*%t(invS1),steps=3) U2<-eigen(S2)$vectors t(U2)%*%invS1 } s1<-runif(100,-sqrt(3),sqrt(3)) s2<-rskewlap(100,c(1/sqrt(2),1/sqrt(2),0)) S <- t(rbind(s1,s2)) A <- matrix(c(1,1,2,5),2,2) X <- S%*%t(A) W<-FOBI(X) Xcent <- sweep(X,2,apply(X,2,mean),"-") Sest <- Xcent%*%t(W) plot(Sest) W<-Dumbgen(X) xcent <- sweep(X,2,HuberM(X)$loc,"-") Sest <- Xcent%*%t(W) plot(Sest) ######### Poikkeavan havainnon vaikutus #########################################33 par(mfcol=c(2,2),mar=c(2,2,2,2),cex=1) s1<-runif(200,-sqrt(3),sqrt(3)) s2<-rskewlap(200,c(1/sqrt(2),1/sqrt(2),0)) S <- t(rbind(s1,s2)) A <- matrix(c(1,1,2,5),2,2) X <- S%*%t(A) W<-FOBI(X) W Xcent <- sweep(X,2,apply(X,2,mean),"-") Sest <- Xcent%*%t(W) plot(Sest,main="FOBI") W<-Dumbgen(X) W xcent <- sweep(X,2,HuberM(X)$loc,"-") Sest <- Xcent%*%t(W) plot(Sest,main="Robusti") X[200,]<-c(-5,0) W<-FOBI(X) W Xcent <- sweep(X,2,apply(X,2,mean),"-") Sest <- Xcent%*%t(W) plot(Sest,main="FOBI - kontaminoitu aineisto") W<-Dumbgen(X) W xcent <- sweep(X,2,HuberM(X)$loc,"-") Sest <- Xcent%*%t(W) plot(Sest,main="Robusti - kontaminoitu aineisto") #############33#### kuvankäsittely #################33########## cov4b <- function(x) { d <- dim(x)[2] n <- dim(x)[1] xm <- sweep(x,2,colMeans(x),"-") z <- xm%*%solve(mat.sqrt(cov(x))) r<-NULL for (i in 1:n) { r<-c(r,sqrt(t(z[i,])%*%z[i,])) } rx <- sweep(xm,1,r,"*") (t(rx)%*%rx/dim(x)[1])/(dim(x)[2]+2) } FOBIb <- function(x) # 4th mom { invS1 <- solve(mat.sqrt(cov(x))) xcent <- sweep(x,2,apply(x,2,mean),"-") S2 <- cov4b(xcent%*%t(invS1)) U2 <- eigen(S2)$vectors t(U2)%*%invS1 } fig1 <- read.pnm(system.file("pictures/cat.pgm", package="ICS")[1]) fig2 <- read.pnm(system.file("pictures/road.pgm", package="ICS")[1]) fig3 <- read.pnm(system.file("pictures/sheep.pgm", package="ICS")[1]) s<-cbind(as.vector(fig1@grey),as.vector(fig2@grey),as.vector(fig3@grey)) A<-matrix(runif(9),3,3) x<-s%*%t(A) x1<-matrix(x[,1],130,130) x2<-matrix(x[,2],130,130) x3<-matrix(x[,3],130,130) par(mfrow = c(1,3),mar=c(1,1,1,1)) plot(pixmapGrey(x1)) plot(pixmapGrey(x2)) plot(pixmapGrey(x3)) W<-FOBIb(x) Xcent <- sweep(x,2,apply(x,2,mean),"-") Sest <- Xcent%*%t(W) Sest1<-(matrix(Sest[,1],130,130)) Sest2<-(matrix(Sest[,2],130,130)) Sest3<-(matrix(Sest[,3],130,130)) par(mfrow = c(1,3),mar=c(1,1,1,1)) plot(pixmapGrey(Sest1)) plot(pixmapGrey(Sest2)) plot(pixmapGrey(Sest3)) ############# Menetelmien vertailu: ############################# ISR<-function(W,A) { d<-dim(W)[2] G<-W%*%A delta<-abs(sweep(G,1,apply(abs(G),1,max),"/")) sqrt((1/d)*sum(delta^2)-1) } ########## Simulation study - ISR ########################## simul<-function(n,A,rep) { ISR1<-NULL ISR2<-NULL ISR3<-NULL ISR4<-NULL for (i in 1:rep) { d<-dim(x)[2] s<-cbind(runif(n,-sqrt(1),sqrt(1)),rskewlap(n,c(1/sqrt(2),1/sqrt(2),0)),rnorm(n)) x<-s%*%t(A) W1<-fastICA(x,d,alg.typ="parallel",fun="logcosh") W1<-t(solve(W1$A)) W2<-fastICA(x,d,alg.typ="parallel",fun="exp") W2<-t(solve(W2$A)) W3<-FOBI(x) ISR1<-c(ISR1,ISR(W1,A)) ISR2<-c(ISR2,ISR(W2,A)) ISR3<-c(ISR3,ISR(W3,A)) } list(ISR1=ISR1,ISR2=ISR2,ISR3=ISR3) } A <- matrix(runif(9),3,3) result <- simul(500,A,100) boxplot(data.frame(result)) ########### otoskoon vaikutus ####################### n<-c(20,50,100,200,500,1000) A <- matrix(runif(9),3,3) IS1<-NULL IS2<-NULL IS3<-NULL IS4<-NULL for (j in 1:6) { A <- matrix(runif(9),3,3) result<-simul(n[j],A,100) IS1<-c(IS1,mean(result$ISR1)) IS2<-c(IS2,mean(result$ISR2)) IS3<-c(IS3,mean(result$ISR3)) } plot(n,IS1,type="l",ylim=c(0,0.45),ylab="mean(ISR)") lines(n,IS2,lwd=2) lines(n,IS3,lty=2) legend(500,0.38, c("FastICA -logcosh", "FastICA2 -exp", "FOBI"), lty = c(1,1,2,2), lwd=c(1,2,1,2),merge = TRUE,bty="n") ############ Simulation study - Amari error ###################### PI<-function(W,A) { d<-dim(W)[1] r<-sqrt(diag((W)%*%t(W))) W<-sweep(W,2,r,"/") G<-W%*%A delta<-abs(sweep(G,1,apply(abs(G),1,max),"/")) rho<-abs(sweep(G,2,apply(abs(G),2,max),"/")) (sum(delta)+sum(rho)-2*d)/(2*d*(d-1)) } simul<-function(n,A,rep) { PI1<-NULL PI2<-NULL PI3<-NULL PI4<-NULL for (i in 1:rep) { s<-cbind(rnorm(n),rexp(n),runif(n,-sqrt(3),sqrt(3))) x<-s%*%t(A) d<-dim(x)[2] W1<-fastICA(x,d,alg.typ="parallel",fun="logcosh") W1<-t(solve(W1$A)) W2<-fastICA(x,d,alg.typ="parallel",fun="exp") W2<-t(solve(W2$A)) W3<-FOBI(x) PI1<-c(PI1,PI(W1,A)) PI2<-c(PI2,PI(W2,A)) PI3<-c(PI3,PI(W3,A)) } list(PI1=PI1,PI2=PI2,PI3=PI3) } A <- matrix(runif(9),3,3) result <- simul(n=500,A,rep=500) boxplot(data.frame(result),xlab="") ############## simulation study - SIR ################################## simul<-function(n,A,rep) { SIR1<-NULL SIR2<-NULL SIR3<-NULL SIR4<-NULL for (i in 1:rep) { S<-cbind(rnorm(n),rexp(n),runif(n,-sqrt(3),sqrt(3))) X<-S%*%t(A) d<-dim(X)[2] S1<-fastICA(X,d,alg.typ="parallel",fun="logcosh")$S S2<-fastICA(X,d,alg.typ="parallel",fun="exp")$S S3<-X%*%t(FOBI(X)) SIR1<-c(SIR1,SIR(S1,S)) SIR2<-c(SIR2,SIR(S2,S)) SIR3<-c(SIR3,SIR(S3,S)) } list(SIR1=SIR1,SIR2=SIR2,SIR3=SIR3) } A <- matrix(runif(9),3,3) result <- simul(n=500,A,rep=500) boxplot(data.frame(result),xlab="") ########### otoskoon vaikutus ##################################3 n<-c(50,100,200,500,1000,2000) A <- matrix(runif(9),3,3) SI1<-NULL SI2<-NULL SI3<-NULL SI4<-NULL for (j in 1:6) { A <- matrix(runif(9),3,3) result<-simul(n[j],A,100) SI1<-c(SI1,mean(result$SIR1)) SI2<-c(SI2,mean(result$SIR2)) SI3<-c(SI3,mean(result$SIR3)) } plot(n,SI1,type="l",ylab="mean(SIR)") lines(n,SI2,lwd=2) lines(n,SI3,lty=2) legend(1000,15, c("FastICA -logcosh", "FastICA2 -exp", "FOBI"), lty = c(1,1,2), lwd=c(1,2,1),merge = TRUE,bty="n") ############# graafiset tarkastelut ################################3 n<-500 S <- cbind(rnorm(n),rexp(n),runif(n,-sqrt(3),sqrt(3))) pairs(S) A<-matrix(runif(9),3,3) X <- S%*%t(A) W <- FOBI(X) Sest <- X%*%t(W) X11() pairs(Sest) # järjestetään rivit ja merkit s.e ||LPG-I|| mahdollisimman pieni. # Lisäksi skaalataan komponentit: G <- W%*%A G # etsitään permutaatiomatriisi: maksim <- apply(abs(G),1,max) P <- t((abs(G)==maksim)*1) P P%*%G # korjataan merkit L <- diag(sign(diag(P%*%G))) L Gnew <- L%*%P%*%G Gnew # permutoidaan ja skaalataan estimoidut komponentit: Snew <- Sest%*%t(L%*%P) Snew <- sweep(Snew,2,sqrt(apply(Snew,2,var)),"/") apply(Snew,2,var) pairs(Sest) par(mfrow = c(3,1)) plot(Snew[,1],S[,1]) abline(0,1) plot(Snew[,2],S[,2]) abline(0,1) plot(Snew[,3],S[,3]) abline(0,1) ############## aikasarja-tyyppiset kuvat ########################## data<-read.table("http://users.jyu.fi/~slahola/files/ica_datoja/speech.dat") s<-as.matrix(data) par(mfrow = c(4, 1)) plot(s[,1],type="l",ylab="s1") plot(s[,2],type="l",ylab="s2") plot(s[,3],type="l",ylab="s3") plot(s[,4],type="l",ylab="s4") # sekoitetaan puhesignaalit A <- matrix(runif(16),4,4) x <- s%*%t(A) par(mfrow = c(4, 1)) plot(x[,1],type="l") plot(x[,2],type="l") plot(x[,3],type="l") plot(x[,4],type="l") # erotetut komponentit FOBI-menetelmällä X11() W <- FOBI(x) s <- sweep(x,2,colMeans(x),"-")%*%t(W) par(mfrow = c(4, 1)) plot(s[,1],type="l",ylab="s1") plot(s[,2],type="l",ylab="s2") plot(s[,3],type="l",ylab="s3") plot(s[,4],type="l",ylab="s4") ############ Laskenta-aikojen vertailu ######### simul<-function(n,A,rep) { T1<-NULL T2<-NULL d<-dim(A)[1] for (i in 1:rep) { S<-matrix(runif(n*d),n,d) X<-S%*%t(A) ptm<-proc.time() res<-fastICA(X,d,alg.typ="parallel",fun="logcosh") time1<-(proc.time()-ptm)[3] ptm<-proc.time() res<-FOBI(X) time2<-(proc.time()-ptm)[3] T1<-c(T1,time1) T2<-c(T2,time2) } list(T1=T1,T2=T2) } A <- matrix(runif(9),3,3) result <- simul(n=1000,A,rep=50) n<-c(100,200,500,1000,2000) Time1<-NULL Time2<-NULL for (j in 1:5) { A <- matrix(runif(9),3,3) result<-simul(n[j],A,50) Time1<-c(Time1,median(result$T1)) Time2<-c(Time2,median(result$T2)) } plot(n,Time1,type="l",ylab="mean(Time)",ylim=c(0,0.1)) lines(n,Time2,lwd=2) legend(100,0.1, c("FastICA -logcosh","FOBI"), lty = c(1,1), lwd=c(1,2), merge = TRUE,bty="n") ######## Robustisuus - vertailuindeksit poikkeavien havaintojen tapauksessa ############### Dumbgen<-function(x) { invS1<-solve(mat.sqrt(huberM(x)$cov)) xcent <- sweep(x,2,HuberM(x)$loc,"-") S2<-tdumbgen.shape(x%*%t(invS1),steps=3) U2<-eigen(S2)$vectors t(U2)%*%invS1 } simul<-function(n,A,rep) { ISR1<-NULL ISR2<-NULL ISR3<-NULL ISR4<-NULL for (i in 1:rep) { d<-dim(A)[2] s<-cbind(runif(n,-sqrt(1),sqrt(1)),rskewlap(n,c(1/sqrt(2),1/sqrt(2),0)),rnorm(n)) x<-s%*%t(A) # add outliers r<-sqrt(diag(x%*%t(x))) index<-d-rank(r)+1 x<-x[order(index),] x[1:(0.01*n),]<-matrix(sign(runif(d*0.01*n,-1,1))*runif(d*0.01*n,1,6),0.01*n,d) W1<-fastICA(x,d,alg.typ="parallel",fun="logcosh") W1<-t(solve(W1$A)) W2<-fastICA(x,d,alg.typ="parallel",fun="exp") W2<-t(solve(W2$A)) W3<-FOBI(x) W4<-Dumbgen(x) ISR1<-c(ISR1,ISR(W1,A)) ISR2<-c(ISR2,ISR(W2,A)) ISR3<-c(ISR3,ISR(W3,A)) ISR4<-c(ISR4,ISR(W4,A)) } list(ISR1=ISR1,ISR2=ISR2,ISR3=ISR3,ISR4=ISR4) } A <- matrix(runif(9),3,3) result <- simul(500,A,200) boxplot(data.frame(result)) ######### Vertailuindeksit paksuhäntäisten jakaumien tapauksessa ########## simul<-function(n,A,rep) { ISR1<-NULL ISR2<-NULL ISR3<-NULL ISR4<-NULL for (i in 1:rep) { d<-dim(x)[2] s<-cbind(rt(n,1),rt(n,2),rt(n,5)) x<-s%*%t(A) W1<-fastICA(x,d,alg.typ="parallel",fun="logcosh") W1<-t(solve(W1$A)) W2<-fastICA(x,d,alg.typ="parallel",fun="exp") W2<-t(solve(W2$A)) W3<-FOBI(x) # W4<-Dumbgen(x) ISR1<-c(ISR1,ISR(W1,A)) ISR2<-c(ISR2,ISR(W2,A)) ISR3<-c(ISR3,ISR(W3,A)) # ISR4<-c(ISR4,ISR(W4,A)) } list(ISR1=ISR1,ISR2=ISR2,ISR3=ISR3) } A <- matrix(runif(9),3,3) result <- simul(500,A,200) boxplot(data.frame(result)) ########## Kohinaa sisältävät mallit ######### ###### keinotekoisten signaalien erotus fastICAlla ######### x<-seq(1,2000,0.5) x<-x[1:2000] y<-sin(x/15) y2<-tan(x/30)/300 y3<-rweibull(2000,2,1) s<-(max(y3)-min(y3))/2+min(y3) y3<-y3-s y3<-y3/max(y3) y4<-NULL for(i in 1:length(x)) { seq<-100 if (x[i]>=0 & x[i]=seq & x[i]<2*seq) {y4[i]<-x[i]-200} if (x[i]>=2*seq & x[i]<3*seq) {y4[i]<--x[i]+200} if (x[i]>=3*seq & x[i]<4*seq) {y4[i]<-x[i]-400} if (x[i]>=4*seq & x[i]<5*seq) {y4[i]<--x[i]+400} if (x[i]>=5*seq & x[i]<6*seq) {y4[i]<-+x[i]-600} if (x[i]>=6*seq & x[i]<7*seq) {y4[i]<--x[i]+600} if (x[i]>=7*seq & x[i]<8*seq) {y4[i]<-+x[i]-800} if (x[i]>=8*seq & x[i]<9*seq) {y4[i]<--x[i]+800} if (x[i]>=9*seq & x[i]<10*seq) {y4[i]<-+x[i]-1000} } y4<-(y4+50)/50 y4<-c(y4,y4[1998],y4[1998]) a<-3 T<-0.1 t <-seq(0,1,0.0005) t<-t[1:2000] mod<-function(x,m) { t1<-floor(x/m) return(x-t1*m) } y2<-a*mod(t,T) s<-t(rbind(y,y2,y4)) par(mfrow = c(3, 1)) plot(s[,1],type="l",ylab="s1") plot(s[,2],type="l",ylab="s2") plot(s[,3],type="l",ylab="s3") A <- matrix(runif(3*100),100,3) X <- s%*%t(A) par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(X[,i],type="l") } # Lisätään valkoista kohonaa X <- s%*%t(A)+rmvnorm(2000,rep(0,100),0.1*diag(100)) par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(X[,i],type="l") } # Erotellaan tavallisella fastICAlla sev_unit <- fastICA(X, 100, fun = "exp", method = "C") par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(sev_unit$S[,i],type="l") } ########## Kohinamallin esikäsittelyä ############ ################ PCA ####################3 plot(eigen(cov(X))$values,type="l") sev_unit <- fastICA(X,3, fun = "exp", method = "C") X11() par(mfrow = c(3, 1),mar=c(1,1,1,1)) for (i in 1:3) { plot(sev_unit$S[,i],type="l") } sev_unit <- fastICA(X,20, fun = "exp", method = "C") X11() par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(sev_unit$S[,i],type="l") } ########### low-pass filtering #############3 par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(X[,i],type="l") } Xs<-filter(X, rep(1/10, 10), sides = 1, circular = TRUE) par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(Xs[,i],type="l") } sev_unit <- fastICA(Xs, 100, fun = "exp", method = "C") par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(sev_unit$S[,i],type="l") } ######### low-pass filtering + PCA ############### sev_unit <- fastICA(Xs, 3, fun = "exp", method = "C") par(mfrow = c(3, 1),mar=c(1,1,1,1)) for (i in 1:3) { plot(sev_unit$S[,i],type="l") } ###### fastICA for noisy data ######### one_unit <- function(X,w.init,eps=1.0e-6,max.iter=100,sigma) { dx<-dim(X)[2] m<-apply(X,2,mean) C<-solve(mat.sqrt(cov(X)-sigma*diag(dx))) Z<-sweep(X,2,m,"-")%*%t(C) w<-w.init w<-w/sqrt(t(w)%*%w) d<-1 iter<-0 while (d > eps) { wz<-Z%*%w w1<-g(wz) w2<-gd(wz) S<-C%*%cov(X)%*%C wk<-c(apply(sweep(Z,1,w1,"*"),2,mean)-mean(w2)*w%*%(diag(dx)+S)) wk<-wk/sqrt(t(wk)%*%wk) d <-sqrt(t(wk-w)%*%(wk-w)) w<-wk iter <- iter+1 if (iter > max.iter) break } list(w=wk,iter=iter) } g<-function(y) { y*exp(-y^2/2) } gd<-function(y) { exp(-y^2/2)*(1-y^2) } #### artificial data as above ####### par(mfrow = c(3, 1)) plot(s[,1],type="l",ylab="s1") plot(s[,2],type="l",ylab="s2") plot(s[,3],type="l",ylab="s3") A <- matrix(runif(3*3),3,3) X <- s%*%t(A) # add random noise X <- s%*%t(A)+rmvnorm(2000,rep(0,3),0.25*diag(3)) par(mfrow = c(3, 1),mar=c(1,1,1,1)) for (i in 1:3) { plot(X[,i],type="l") } w<-one_unit(X,c(1,1,1),sigma=0.1)$w dev.off() s<-X%*%w plot(s,type="l") ############ EFOBI signaali-kohina-mallille ############################## EFOBI<-function(X,k) { # orthogonalization: d<-dim(X)[2] n<-dim(X)[1] eig<-eigen(cov(X)) sigma<-mean(eig$values[(k+1):d]) eigval<-eig$values[1:k] Gamma<-eig$vectors[,1:k] T<-diag(1/sqrt(eigval-sigma))%*%t(Gamma) Z<-X%*%t(T) # 4th moments: S2<-(t(Z)%*%Z)%*%(t(Z)%*%Z)/n D<-eigval-sigma apu<-(D+sigma)*cumsum(1/D)+2*sigma/D delta<-((k+4)*sigma/D)+(sigma/D)*apu S2d<-S2-diag(delta) # final rotation V<-eigen(S2d)$vectors A<-t(T)%*%V S<-Z%*%V list(S=S,A=A,kurt=eigen(S2d)$values) } ###### Esimerkki ##################3 s<-t(rbind(y,y2,y4)) par(mfrow = c(3, 1)) plot(s[,1],type="l",ylab="s1") plot(s[,2],type="l",ylab="s2") plot(s[,3],type="l",ylab="s3") A <- matrix(runif(3*100),100,3) X <- s%*%t(A) par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(X[,i],type="l") } # add random noise X <- s%*%t(A)+rmvnorm(2000,rep(0,100),0.1*diag(100)) par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(X[,i],type="l") } S<-EFOBI(X,3)$S par(mfrow = c(3, 1),mar=c(1,1,1,1)) for (i in 1:3) { plot(S[,i],type="l") } EFOBI(X,3)$kurt ######### MEG-data esimerkki ############################### data<-read.table("http://users.jyu.fi/~slahola/files/ica_datoja/MEG.dat") x<-as.matrix(data) par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 11:20) { plot(x[,i],type="l") } dev.off() plot(eigen(cov(x))$values) S<-EFOBI(x,7)$S par(mfrow = c(7, 1),mar=c(1,1,1,1)) for (i in 1:7) { plot(S[,i],type="l") } W<-FOBI(x) Xcent <- sweep(x,2,apply(x,2,mean),"-") Sest <- Xcent%*%t(W) X11() par(mfrow = c(10, 1),mar=c(1,1,1,1)) for (i in 1:10) { plot(Sest[,i],type="l") } ########## AMUSE aikariippuville muuttujille ######### AMUSE <- function(x) { n<-dim(x)[1] # valkaisu invS1<-solve(mat.sqrt(cov(x))) z<-sweep(x,2,colMeans(x),"-")%*%invS1 # Autokovarianssi S<-t(z[1:(n-1),])%*%z[2:n,]/n Stau<-(1/2)*(S+t(S)) t(eigen(Stau)$vectors)%*%invS1 } ########## Simulointi AR(1) prosesseilla ############### ISR<-function(W,A) { d<-dim(W)[2] G<-W%*%A delta<-abs(sweep(G,1,apply(abs(G),1,max),"/")) sqrt((1/d)*sum(delta^2)-1) } simul<-function(n,A,rep) { ISR1<-NULL ISR2<-NULL ISR3<-NULL ISR4<-NULL for (i in 1:rep) { d<-dim(A)[2] s1<-arima.sim(list(order=c(1,0,0), ar=0.1), n) s2<-arima.sim(list(order=c(1,0,0), ar=0.3), n) s3<-arima.sim(list(order=c(1,0,0), ar=0.5), n) s4<-arima.sim(list(order=c(1,0,0), ar=0.7), n) s<-t(rbind(s1,s2,s3,s4)) x<-s%*%t(A) W1<-fastICA(x,d,alg.typ="parallel",fun="logcosh") W1<-t(solve(W1$A)) W2<-FOBI(x) W3<-AMUSE(x) ISR1<-c(ISR1,ISR(W1,A)) ISR2<-c(ISR2,ISR(W2,A)) ISR3<-c(ISR3,ISR(W3,A)) } list(ISR1=ISR1,ISR2=ISR2,ISR3=ISR3) } A <- matrix(runif(16),4,4) result <- simul(500,A,100) dev.off() boxplot(data.frame(result)) # huberM -funktio laskee robustin kovarianssimatriisiestimaatin (M-estimaatin) HuberM<-function(data) { n <- dim(data)[1] k <- dim(data)[2] q <- pchisq(k+1,k) c <- qchisq(q,k) c2 <- pchisq(c,k+2)+(c/k)*(1-q) bdp <- min(1/c,1-k/c) MAX_ITER <- 250; # Max number of iteration EPS <- 1.0e-6; # Iteration accuracy # STARTING VALUES t <- apply(data,2,mean) C <- cov(data) R<-chol(solve(C)) iter <- 0 d1<-1 d2<-1 while (d1 > EPS & d2 > EPS ) { # SCATTER STEP u<-NULL data2 <- sweep(data,2,t,"-") s <- diag(data2%*%(t(R)%*%R)%*%t(data2)) u[s<=c] <- (1/c2) u[s>c] <- (s[s>c]/(c/c2))^{-1} C <- R%*%(t(data2)%*%sweep(data2,1,u,"*")/n)%*%t(R) R0 <- chol(solve(C)) R <- R0%*%R d1 = max(apply(abs(R0-diag(k)),1,sum)) # LOCATION STEP v <- NULL s <- diag(data2%*%(t(R)%*%R)%*%t(data2)) v[s<=c] <- 1 v[s>c] <- sqrt(c)/sqrt(s[s>c]) h <- apply(sweep(data2,1,v,"*"),2,mean)/mean(v); t <- t + h; d2 <-sqrt(t(h-t)%*%(h-t)) iter <- iter+1 if (iter > MAX_ITER) break } C <- t(R)%*%R C <- solve(C) list (loc=t,cov=C,iter=iter) }