機械的に発生させた正規乱数に対してSAMを用いてみたシミュレーション

cnt1 <- rnorm(40000, mean = 200, sd = 1)
cnt2 <- rnorm(40000, mean = 255, sd = 1)
cnt3 <- rnorm(40000, mean = 230, sd = 1)
cnt4 <- rnorm(40000, mean = 344, sd = 1)
treat1 <- rnorm(40000, mean = 250, sd = 1)
treat2 <- rnorm(40000, mean = 210, sd = 1)
treat3 <- rnorm(40000, mean = 240, sd = 1)
treat4 <- rnorm(40000, mean = 210, sd = 1)
data <- cbind(cnt1, cnt2, cnt3, cnt4, treat1,treat2, treat3, treat4)
data[1,]
    cnt1     cnt2     cnt3     cnt4   treat1   treat2   treat3   treat4
199.8450 255.8026 230.7610 343.8670 248.4101 210.0879 239.5559 211.8849

boxplot(data)

par(mfrow = c(2,4))
hist(data[,1], main = "control 1",xlab = "gene expression")
hist(data[,2], main = "control 2",xlab = "gene expression")
hist(data[,3], main = "control 3",xlab = "gene expression")
hist(data[,4], main = "control 4",xlab = "gene expression")
hist(data[,5], main = "treatment 1",xlab = "gene expression")
hist(data[,6], main = "treatment 2",xlab = "gene expression")
hist(data[,7], main = "treatment 3",xlab = "gene expression")
hist(data[,8], main = "treatment 4",xlab = "gene expression")
dev.off()

for (i in 1:8){
data[,i] <- data[,i]/median(data[,i])
}

boxplot(data)

for (i in 1: length(data[,1])){
data[i,] <- data[i,]/median(data[i,])
}

for (i in 1: length(data[,1])){
data[i,] <- log(data[i,], 2)
}


norm <- log(data, 2)



library(siggenes)
n1 <- length(data[1,])/2
n2 <- length(data[1,])/2
cl <- rep(c(0,1), c(n1,n2))

sam.out <- sam(norm, cl, rand = 123)
summary(sam.out)

print(sam.out, seq(1,3,0.1)

plot(sam.out, 1)
plot(sam.out, 0,1)
plot(sam.out, 0.01)

print(sam.out, seq(0.01, 0.1, 0.01))
SAM Analysis for the Two-Class Unpaired Case Assuming Unequal Variances

   Delta    p0  False Called   FDR
1   0.01 0.995 94.714    106 0.889
2   0.02 0.995  2.629      5 0.523
3   0.03 0.995  2.629      5 0.523
4   0.04 0.995      0      0     0
5   0.05 0.995      0      0     0
6   0.06 0.995      0      0     0
7   0.07 0.995      0      0     0
8   0.08 0.995      0      0     0
9   0.09 0.995      0      0     0
10  0.10 0.995      0      0     0

sum.sam.out <- summary(sam.out, 0.03)
sum.sam.out


slotNames(sum.sam.out)
[1] "row.sig.genes" "mat.fdr"       "mat.sig"       "list.args"


significant.genes <- sum.sam.out@mat.sig
> significant.genes[1:5,]
    Row   d.value    stdev         rawp   q.value      R.fold
1   766  2.067911 1.888463 1.285714e-05 0.5142857 23.17078675
2 29588  2.036899 1.519079 2.571429e-05 0.5142857 13.12129065
3 34635 -2.017873 1.497832 4.285714e-05 0.5714286  0.08042155
4  7750 -1.992394 1.562408 9.428571e-05 0.7836735  0.07593873
5 17922 -1.986813 1.423697 1.028571e-04 0.7836735  0.09258940