$ R
source("http://bioconductor.org/biocLite.R")
biocLite("Agi4x44PreProcess")
source("http://bioconductor.org/biocLite.R")
biocLite("hgug4112a.db")
-----大体の流れ-------
The pre-processing steps implemented in the package are the following:
1.- read the target file
2.- read the array samples
3.- Background correction
and Normalization between samples
4.- Filtering Probes by Quality Flag
5.- Summarizing of Replicated Probes
6.- Creating and ExpressionSet object with the processed data
-----ここまで--------
library(Agi4x44PreProcess)
library(hgug4112a.db)
data(targets)
targets
FileName Treatment GErep Subject Array
Ast Ast.txt st 1 A 1
Bst Bst.txt st 1 B 1
Aunst Aunst.txt unst 2 A 1
Bunst Bunst.txt unst 2 B 1
#自分のデータで行うときは以下のようにする。
targets <- read.targets(infile = "targets.txt")
library(Agi4x44PreProcess)
targets
FileName Treatment GErep Subject Array
A1 A2.txt chemA 1 A 1
B1 B2.txt chemB 1 B 1
A2 A2.txt chemA 2 A 1
B2 B2.txt chemB 2 B 1
#ここまで
dd(dd)
dd
An object of class "RGList"
$R
Ast Bst Aunst Bunst
[1,] 231000.0 2.94e+05 2.97e+05 2.10e+05
[2,] 12.6 8.10e+00 9.87e+00 7.38e+00
[3,] 12.6 8.07e+00 9.82e+00 7.33e+00
[4,] 12.6 8.05e+00 9.79e+00 7.28e+00
[5,] 12.6 8.02e+00 9.75e+00 7.24e+00
12010 more rows ...
$G
Ast Bst Aunst Bunst
[1,] 201000.0 254000.0 255000.0 169000.0
[2,] 50.4 76.5 60.8 66.2
[3,] 54.5 78.6 61.3 57.9
[4,] 54.4 73.6 58.7 53.4
[5,] 53.9 76.4 62.1 56.5
12010 more rows ...
$Rb
Ast Bst Aunst Bunst
[1,] 45 54 53 46.0
[2,] 45 55 55 46.0
[3,] 44 54 54 47.0
[4,] 44 54 54 46.0
[5,] 44 54 53 46.5
12010 more rows ...
$Gb
Ast Bst Aunst Bunst
[1,] 68.9818 77.4722 68.5002 67.3098
[2,] 68.5172 77.4488 68.2706 67.0565
[3,] 67.9893 77.4727 68.0414 66.7981
[4,] 67.5123 77.4587 67.7860 66.5539
[5,] 67.0601 77.4210 67.5620 66.3154
12010 more rows ...
$targets
FileName
Ast Ast.txt
Bst Bst.txt
Aunst Aunst.txt
Bunst Bunst.txt
$genes
Sequence ControlType ProbeName GeneName SystematicName
1 1 GE_BrightCorner GE_BrightCorner GE_BrightCorner
2 1 DarkCorner DarkCorner DarkCorner
3 1 DarkCorner DarkCorner DarkCorner
4 1 DarkCorner DarkCorner DarkCorner
5 1 DarkCorner DarkCorner DarkCorner
Description
1
2
3
4
5
12010 more rows ...
$other
$gIsFound
Ast Bst Aunst Bunst
[1,] 1 1 1 1
[2,] 1 1 1 1
[3,] 1 1 1 1
[4,] 1 1 1 1
[5,] 1 1 1 1
12010 more rows ...
$gIsWellAboveBG
Ast Bst Aunst Bunst
[1,] 1 1 1 1
[2,] 0 0 0 0
[3,] 0 0 0 0
[4,] 0 0 0 0
[5,] 0 0 0 0
12010 more rows ...
$gIsSaturated
Ast Bst Aunst Bunst
[1,] 0 0 0 0
[2,] 0 0 0 0
[3,] 0 0 0 0
[4,] 0 0 0 0
[5,] 0 0 0 0
12010 more rows ...
$gIsFeatNonUnifOL
Ast Bst Aunst Bunst
[1,] 0 0 0 0
[2,] 0 0 0 0
[3,] 0 0 0 0
[4,] 0 0 0 0
[5,] 0 0 0 0
12010 more rows ...
$gIsFeatPopnOL
Ast Bst Aunst Bunst
[1,] 0 0 0 0
[2,] 0 0 0 1
[3,] 0 0 0 0
[4,] 0 0 0 0
[5,] 0 0 0 0
12010 more rows ...
$chr_coord
Ast Bst Aunst Bunst
[1,] " " " " " " " "
[2,] " " " " " " " "
[3,] " " " " " " " "
[4,] " " " " " " " "
[5,] " " " " " " " "
12010 more rows ...
#自分のデータでこれをやるなら
dd <- read.AgilentFE(targets, makePLOT = FALSE) #TRUEだとプロットしてくれるよ☆
#ここまで。
#RGListオブジェクトの各項目の説明は以下。
variable
dd$R:gProcessedSignal
dd$G:gMeanSignal
dd$Rb:gBGMedianSignal
dd$Gb:gBGUsed
dd$targets:File names
dd$genes$ProbeName:Probe Name
dd$genes$GeneName:Gene Name
dd$genes$SystematicName:Systematic Name
dd$genes$Description:Description Name
dd$genes$Sequence:60 bases Sequence
dd$genes$ControlType:FLAG to specify the sort of feature
dd$other$gIsWellAboveBG:FLAG IsWellAboveBG
dd$other$gIsFound:FLAG IsFound
dd$other$gIsSaturated:FLAG IsSaturated
dd$other$gIsFeatPopnOL:FLAG IsFeatPopnOL
dd$other$gIsFeatNonUnifOL:FLAG IsFeatNonUnifOL
dd$other$chr coord:CHR coordinate from Agilent data files
#ここまで
#ddの発現値に関して箱ヒゲ図を描く。ただし、大文字が二つ入っていることに注意。
png("BoxPlot.1.png")
BoxPlot(log2(dd$R), "ProcessedSignal", "red", xlab = "Samples", ylab = "expression")
dev.off()
png("plotDensity.1.png")
plotDensity(log2(dd$R), "ProcessedSignal")
dev.off()
#normalizationします☆
ddNORM = BGandNorm(dd, BGmethod = "half", NORMmethod = "quantile",foreground = "MeanSignal", background = "BGMedianSignal",offset = 50, makePLOTpre = FALSE, makePLOTpost = FALSE)
#TRUEにすると図が描写されるがまだ勉強不足。
ddNORM = BGandNorm(dd, BGmethod = "half", NORMmethod = "quantile",foreground = "MeanSignal", background = "BGMedianSignal",offset = 50, makePLOTpre = T, makePLOTpost = T)
#フラッグによりフィルタリングします☆
ddFILT = filter.probes(ddNORM, control = TRUE, wellaboveBG = TRUE,isfound = TRUE, wellaboveNEG = TRUE, sat = TRUE, PopnOL = TRUE,NonUnifOL = T, nas = TRUE, limWellAbove = 75,limISF = 75,limNEG = 75, limSAT = 75, limPopnOL = 75, limNonUnifOL = 75,limNAS = 100, makePLOT = F, annotation.package = "hgug4112a.db",flag.counts = T, targets)
#まとめをつくります。コントロールプローブを除きます☆
ddPROC = summarize.probe(ddFILT, makePLOT = FALSE, targets)
#非常に多くのメソッドが使うことができるExpressionSetオブジェクトを生成してみる!!!!
esetPROC = build.eset(ddPROC, targets, makePLOT = FALSE, annotation.package = "hgug4112a.db")
class(esetPROC)
[1] "ExpressionSet" #キタァ━゚+.(○・艸)(艸・●)゚+.━!!
attr(,"package")
[1] "Biobase"
png("110208_heatmap.png")
HeatMap(exprs(esetPROC), size = 100, "100 High Var genes")
dev.off()