バイオインフォマティクス特論12

(1)

バイオインフォマティクス特論

3 ケーススタディ

3-1. 記憶の保持 (2)

(2)

事後予測分布

パラメータの事後分布に従って、モデルがどんなデータを期待す

るかを予測する。

予測分布が観測されたデータと⼀致するかを確認することで、モ

デルの適切さを確認できる。

前回と同じ問題で事後予測を⾏う。

(3)

3-1. 記憶の保持

3-1-1. 個⼈差を考えない場合

3-1-2. 完全な個⼈差を考える場合

(4)

3-1-1. 個人差を考えない場合

「ベイズ統計で実践モデリング」

第10章記憶の保持

10.1 個⼈差を考えない場合

(5)

データと問題

記憶の保持の指数的減衰モデル

時間

t

経過した後にある項⽬を想起する確率θ

_t

= exp (-α

t

) +β

αは記憶の減衰率

βは⾮常に⻑い時間たっても残っているベースラインの想起レベル

４⼈の⼈に対して、10の時点(1,2,4,7,12,21,35,59,99,200)において、

18項⽬についてテストした結果

αとβの推定

(6)

4名の実験参加者と10の時点の記憶保持データ

実験参加者 1 2 4 7 12 21 35 59 99 200 1 18 18 16 13 9 6 4 4 4 ? 2 17 13 9 6 4 4 4 4 4 ? 3 14 10 6 4 4 4 4 4 4 ? 4 ? ? ? ? ? ? ? ? ? ? 時点

(7)

問題をグラフィカルモデルで表現

θ

_j

k

_ij

n

β

α

t

_j

j回⽬ i番⽬の⼈

α ~ Beta(1,1)

β ~ Beta(1,1)

θ

_j

<- min(1, exp(-α

t

)+β

k

_ij

~ Binomial(θ

_j

,

n

)

(8)

# Retention With No Individual Differences model{

# Observed and Predicted Data for (i in 1:ns){ for (j in 1:nt){ k[i,j] ~ dbin(theta[i,j],n) predk[i,j] ~ dbin(theta[i,j],n) } }

# Retention Rate At Each Lag For Each Subject Decays Exponentially for (i in 1:ns){ for (j in 1:nt){ theta[i,j] <- min(1,exp(-alpha*t[j])+beta) } } # Priors alpha ~ dbeta(1,1) beta ~ dbeta(1,1) }

Jagsでのモデルの記述

Retention_1.txt

(9)

R2JagsによるMCMCサンプリング

# clears workspace: rm(list=ls())

# sets working directories:

setwd("C:/Users/EJ/Dropbox/EJ/temp/BayesBook/test/CaseStudies/MemoryRetention") library(R2jags) t <- c(1, 2, 4, 7, 12, 21, 35, 59, 99, 200) nt <- length(t) slist <- 1:4 ns <- length(slist) k <- matrix(c(18, 18, 16, 13, 9, 6, 4, 4, 4, NA, 17, 13, 9, 6, 4, 4, 4, 4, 4, NA, 14, 10, 6, 4, 4, 4, 4, 4, 4, NA,

NA, NA, NA, NA,NA,NA,NA,NA,NA, NA), nrow=ns, ncol=nt, byrow=T) k

n <- 18

(10)

myinits <- list(

list(alpha = 0.5, beta = 0.1)) # parameters to be monitored:

parameters <- c("alpha", "beta", "predk")

# The following command calls JAGS with specific options. # For a detailed description see the R2jags documentation. samples <- jags(data, inits=myinits, parameters,

model.file ="Retention_1.txt",

n.chains=1, n.iter=10000, n.burnin=1, n.thin=1, DIC=T) # Now the values for the monitored parameters are in the "samples" object,

# ready for inspection.

##############Fancy plots################################################ ##Figure 10.2

alpha <- samples$BUGSoutput$sims.array[,,1] beta <- samples$BUGSoutput$sims.array[,,2] d.beta <- density(beta)

(11)

alpha <- samples$BUGSoutput$sims.array[,,1] beta <- samples$BUGSoutput$sims.array[,,2] d.beta <- density(beta)

layout(matrix(c(1,2,3,0),2,2,byrow=T), width=c(2/3, 1/3), heights=c(2/3,1/3)) #layout.show()

par(mar=c(2,2,1,0))

plot(alpha,beta, xlab="", ylab="", xlim=c(0,1),ylim=c(0,1),axes=F) box(lty=1)

par(mar=c(2,2,1,4))

plot(d.beta$y, d.beta$x, ylim=range(c(0,1)), xlim=rev(range(d.beta$y)), type='l', axes=F, xlab="", ylab="")

axis(4, at=c(0,1))

mtext(expression(beta), side=4,line=1, cex=1.3) box(lty=1)

par(mar=c(6,2,0,0))

plot(density(alpha),zero.line=F, main="", ylab="", xlab="", cex.lab=1.3,xlim=c(0,1), axes=F)

axis(1,at=c(0,1))

mtext(expression(alpha), side=1.2,line=1, cex=1.3) box(lty=1)

(12)

αとβには緩やかな関係性がある。

同時事後分布からわかること

２つの周辺分布よりも多くの情報を持っている

(13)

layout(matrix(c(1:4),2,2,byrow=T)) #layout.show() sc <- 3.5 jj <- numeric() xx <- numeric() for (i in 1:ns) {

plot(-1,100,xlim=c(0,10),ylim=c(0,18), main=(paste("Subject", i)),xlab=("Time Lags"), ylab=("Retention Count"),cex.lab=1.3, axes=F)

axis(1, at=c(1,2,3,4,5,6,7,8,9,10), lab=c("1","2","4","7","12","21","35","59","99","200"),cex.axis=0.7) axis(2, at=c(0,18),lab=c("0","18"),cex.axis=0.7) box(lty=1) for (j in 1:nt) { count <- hist(samples$BUGSoutput$sims.list$predk[,i,j],c(0:n),plot=F) # count <- count$counts # count <- count/sum(count) for (x in 1:n) { if (count$density[x]>0) { points(j,x,pch=22, col="black",cex=sc*sqrt(count$density[x])) if (!is.na(k[i,j]) && k[i,j]==x)

{ points(j,x,pch=22,bg="black",cex=sc*sqrt(count$density[x])) jj <- c(jj,j) xx <- c(xx,x) } } } }

coords <- list(x=jj, y=xx) lines(coords,lwd=2)

jj <- numeric() xx <- numeric() }

(14)

実験参加者の1と３は、事後予測と観測データの当てはまりが悪い個⼈差なしというモデルは不適切

(15)

3-1-2.完全な個⼈差を考える場合

「ベイズ統計で実践モデリング」

第10章記憶の保持

10.2 完全な個⼈差を考える場合

(16)

問題をグラフィカルモデルで表現

θ

_ij

k

_ij

n

β

_i

α

_i

t

_j

j回⽬ i番⽬の⼈

α

_i

~ Beta(1,1)

β

_i

~ Beta(1,1)

θ

_ij

<- min(1, exp(-α

_i

t

)+β

_i

k

_ij

~ Binomial(θ

_ij

,

n

)

(17)

model{ for (i in 1:ns){ for (j in 1:nt){ k[i,j] ~ dbin(theta[i,j],n) predk[i,j] ~ dbin(theta[i,j],n) } } for (i in 1:ns){ for (j in 1:nt){ theta[i,j] <- min(1,exp(-alpha[i]*t[j])+beta[i]) } } for (i in 1:ns){ alpha[i] ~ dbeta(1,1) beta[i] ~ dbeta(1,1) } }

Jagsでのモデルの記述

Retention_2.txt

(18)

# clears workspace: rm(list=ls())

n <- 18

data <- list("k", "n", "t", "ns", "nt") # to be passed on to JAGS myinits <- list(

(19)

# parameters to be monitored:

n.chains=1, n.iter=10000, n.burnin=1, n.thin=1, DIC=T) # Now the values for the monitored parameters are in the "samples" object,

# ready for inspection. ##Figure 10.5

n.iter <- 10000 keepi <- 500

(20)

alpha1 <- samples$BUGSoutput$sims.array[,,1] alpha2 <- samples$BUGSoutput$sims.array[,,2] alpha3 <- samples$BUGSoutput$sims.array[,,3] alpha4 <- samples$BUGSoutput$sims.array[,,4] beta1 <- samples$BUGSoutput$sims.array[,,5] beta2 <- samples$BUGSoutput$sims.array[,,6] beta3 <- samples$BUGSoutput$sims.array[,,7] beta4 <- samples$BUGSoutput$sims.array[,,8] d.beta1 <- density(beta1) d.beta2 <- density(beta2) d.beta3 <- density(beta3) d.beta4 <- density(beta4)

layout(matrix(c(1,2,3,0),2,2,byrow=T), width=c(2/3, 1/3), heights=c(2/3,1/3)) #layout.show()

(21)

plot(alpha1[keep],beta1[keep], xlab="", ylab="", xlim=c(0,1), ylim=c(0,1), axes=F) points(alpha2[keep],beta2[keep], col="red") points(alpha3[keep],beta3[keep], col="green") points(alpha4[keep],beta4[keep],col="blue") box(lty=1) par(mar=c(2,1,1,4))

plot(d.beta1$y, d.beta1$x, ylim=range(c(0,1)), xlim=c(12,0),type='l', axes=F, xlab="", ylab="")

#plot(d.beta1$y, d.beta1$x, ylim=range(c(0,1)), xlim=rev(range(d.beta1$y)),type='l', axes=F, xlab="", ylab="") lines(d.beta2$y, d.beta2$x, col="red")

lines(d.beta3$y, d.beta3$x, col="green") lines(d.beta4$y, d.beta4$x, col="blue") axis(4, at=c(0,1))

par(mar=c(6,2,0,0))

plot(density(alpha1),zero.line=F ,main="", ylab="", xlab="", cex.lab=1.3,xlim=c(0,1), axes=F) lines(density(alpha2), col="red")

lines(density(alpha3), col="green") lines(density(alpha4),col="blue") axis(1,at=c(0,1))

(22)

個⼈差を認めることで

減衰パラメータαは実験参加者によって別々の値をよることが⽰された。

(23)

plot(-1,100,xlim=c(0,10),ylim=c(0,18), main=(paste("Subject", i)),xlab=("Time Lags"), ylab=("Retention Count"),cex.lab=1.3, axes axis(1, at=c(1,2,3,4,5,6,7,8,9,10), lab=c("1","2","4","7","12","21","35","59","99","200"),cex.axis=0.7)

axis(2, at=c(0,18),lab=c("0","18"),cex.axis=0.7) box(lty=1) for (j in 1:nt) { j1 <- i+9 + (j-1)*4 count <- hist(samples$BUGSoutput$sims.list$predk[,i,j],c(0:n),plot=F) # count <- count$counts # count <- count/sum(count) for (x in 1:n){ if (count$density[x]>0){ points(j,x,pch=22, col="black",cex=sc*sqrt(count$density[x])) if (!is.na(k[i,j]) && k[i,j]==x){

points(j,x,pch=22,bg="black",cex=sc*sqrt(count$density[x])) jj <- c(jj,j) xx <- c(xx,x) } } } }

(24)

実験参加者1~3については観測データと予測分布が対応し、モデルの妥当性を⽰している実験参加者4については、利⽤できる情報はαとβの事前分布のみであることから、予測分布には役に⽴つ情報は含まれていない

(25)

3-1-3. 構造化された個⼈差を考える場合

「ベイズ統計で実践モデリング」

第10章記憶の保持

10.3 構造化された個⼈差を考える場合

(26)

問題をグラフィカルモデルで表現

θ

_ij

k

_ij

n

β

_i

α

_i

t

_j

j回⽬ i番⽬の⼈

μ

_α

~Beta(1,1)

λ

_α

~Gammma(.001, .001)

μ

_β

~Beta(1,1)

λ

_β

~Gammma(.001, .001)

α

_i

~ Gaussian(μ

_α

, λ

_α

)

_r(0,1)

β

_i

~ Gaussian(μ

_β

, λ

_β

)

_r(0,1)

θ

_ij

<- min(1, exp(-α

_i

t

)+β

_i

k

_ij

~ Binomial(θ

_ij

,

n

)

μ_α λα μ_β λβ

(27)

model{ for (i in 1:ns){ for (j in 1:nt){ k[i,j] ~ dbin(theta[i,j],n) predk[i,j] ~ dbin(theta[i,j],n) } } for (i in 1:ns){ for (j in 1:nt){ theta[i,j] <- min(1,exp(-alpha[i]*t[j])+beta[i]) } } for (i in 1:ns){ alpha[i] ~ dnorm(alphamu,alphalambda)I(0,1) beta[i] ~ dnorm(betamu,betalambda)I(0,1) } alphamu ~ dbeta(1,1) alphalambda ~ dgamma(.001,.001)I(.001,) alphasigma <- 1/sqrt(alphalambda) betamu ~ dbeta(1,1) betalambda ~ dgamma(.001,.001)I(.001,) betasigma <- 1/sqrt(betalambda) }

Jagsでのモデルの記述

Retention_3.txt

(28)

rm(list=ls())

n <- 18

data <- list("k", "n", "t", "ns", "nt") # to be passed on to JAGS myinits <- list(

list(alphamu = 0.5, alphalambda = 1, betamu = 0.5, betalambda = 1, alpha = rep(0.5,ns), beta = rep(0.1,ns))) # parameters to be monitored:

(29)

n.iter <- 10000 keepi <- 500

keep <- sample(n.iter, keepi)

alpha1 <- samples$BUGSoutput$sims.array[,,1] alpha2 <- samples$BUGSoutput$sims.array[,,2] alpha3 <- samples$BUGSoutput$sims.array[,,3] alpha4 <- samples$BUGSoutput$sims.array[,,4] beta1 <- samples$BUGSoutput$sims.array[,,5] beta2 <- samples$BUGSoutput$sims.array[,,6] beta3 <- samples$BUGSoutput$sims.array[,,7] beta4 <- samples$BUGSoutput$sims.array[,,8] d.beta1 <- density(beta1) d.beta2 <- density(beta2) d.beta3 <- density(beta3) d.beta4 <- density(beta4)

(30)

par(mar=c(2,2,1,0))

plot(alpha1[keep],beta1[keep], xlab="", ylab="", xlim=c(0,1), ylim=c(0,1), axes=F) points(alpha2[keep],beta2[keep], col="red")

points(alpha3[keep],beta3[keep], col="green") points(alpha4[keep],beta4[keep],col="blue") box(lty=1)

par(mar=c(2,1,1,4))

plot(d.beta1$y, d.beta1$x, ylim=range(c(0,1)), xlim=c(12,0),type='l', axes=F, xlab="", ylab="")

#plot(d.beta1$y, d.beta1$x, ylim=range(c(0,1)), xlim=rev(range(d.beta1$y)),type='l', axes=F, xlab="", ylab="") lines(d.beta2$y, d.beta2$x, col="red")

lines(d.beta3$y, d.beta3$x, col="green") lines(d.beta4$y, d.beta4$x, col="blue") axis(4, at=c(0,1))

par(mar=c(6,2,0,0))

plot(density(alpha1),zero.line=F ,main="", ylab="", xlab="", cex.lab=1.3,xlim=c(0,1), axes=F) lines(density(alpha2), col="red")

lines(density(alpha3), col="green") lines(density(alpha4),col="blue") axis(1,at=c(0,1))

(31)

(32)

plot(-1,100,xlim=c(0,10),ylim=c(0,18), main=(paste("Subject", i)),xlab=("Time Lags"), ylab=("Retention Count"),cex.lab=1.3, axes=F)

axis(1, at=c(1,2,3,4,5,6,7,8,9,10), lab=c("1","2","4","7","12","21","35","59","99","200"),cex.axis=0.7) axis(2, at=c(0,18),lab=c("0","18"),cex.axis=0.7) box(lty=1) for (j in 1:nt) { count <- hist(samples$BUGSoutput$sims.list$predk[,i,j],c(0:n),plot=F) # count <- count$counts # count <- count/sum(count) for (x in 1:n){ if (count$density[x]>0){ points(j,x,pch=22, col="black",cex=sc*sqrt(count$density[x])) if (!is.na(k[i,j]) && k[i,j]==x){

points(j,x,pch=22,bg="black",cex=sc*sqrt(count$density[x])) jj <- c(jj,j) xx <- c(xx,x) } } } }

(33)

階層モデルによってグループ全体の構造を記述するガウス分布を指定これによってある実験参加者から学んだことの影響が他の実験参加者に影響を与える実験参加者４についてもパラメータの範囲を絞り込めている

バイオインフォマティクス特論12