推荐下载wgca详细讲解Word下载.docx

资源描述

推荐下载wgca详细讲解Word下载.docx

《推荐下载wgca详细讲解Word下载.docx》由会员分享，可在线阅读，更多相关《推荐下载wgca详细讲解Word下载.docx（11页珍藏版）》请在冰豆网上搜索。

推荐下载wgca详细讲解Word下载.docx

#对datExpr0的数据进行goodSamplesGenes测试，看符合标准的基因的返回值

gsg=goodSamplesGenes（datExpr0,verbose=3）;

gsg＄allOK

#如果有不合格的基因或数据，则显示出来，并剔除

if（！

gsg＄allOK）

｛

#显示出要被删除的基因和数据

if（sum（！

gsg$goodGenes）>

0）

printFlush（paste（"

Removinggenes：

，paste（names（datExpr0）[！

gsg$goodGenes］，collapse=”,”）））;

gsg$goodSamples）〉0）

Removingsamples:

”，paste（rownames（datExpr0）[!

gsg＄goodSamples］,collapse="

”）））;

＃去掉不合格的基因和数据

datExpr0=datExpr0［gsg＄goodSamples，gsg＄goodGenes]

｝

＃对数据进行聚类

sampleTree=hclust（dist（datExpr0），method=”average”）;

＃画出聚类树：

打开一个大小为12到9英寸的图形输出窗口

＃如果窗口太大或太小，用户应该更改维度

sizeGrWindow（12，9）

#pdf（文件=”Plots/sampleClustering.pdf"

宽=12,高=9）；

par（cex=0。

6）；

par（mar=c（0,4，2，0））

plot（sampleTree，main="

Sampleclusteringtodetectoutliers”,sub="

xlab="

，cex.lab=1.5，

cex。

axis=1。

5，cex.main=2）

#画一条线切割离群数据

abline（h=15,col="

red"

）；

＃确定下面的集群

clust=cutreeStatic（sampleTree，cutHeight=15,minSize=10）

table（clust）

#1集群里含有需要的数据，把其命名为keepSamples

keepSamples=（clust==1）

＃把dataExpr0中与keepSamples中相同的行定义为datExpr

datExpr=datExpr0［keepSamples，］

nGenes=ncol（datExpr）

nSamples=nrow（datExpr）

＃导入traitData数据

traitData=read.csv（"

ClinicalTraits.csv”）;

dim（traitData）

names（traitData）

＃去掉不需要的性状

allTraits=traitData［,-c（31，16）]；

allTraits=allTraits[，c（2，11:

36）];

dim（allTraits）

names（allTraits）

＃形成一个类似于表达数据的数据框架,这些数据将保持临床特征

#femaleSamples为datExpr的样本名,traitRows为把老鼠特征性状中与femaleSample中样本一样的提取

femaleSamples=rownames（datExpr）；

traitRows=match（femaleSamples,allTraits＄Mice）;

datTraits=allTraits[traitRows，-1]；

rownames（datTraits）=allTraits[traitRows，1］；

collectGarbage（）;

#重新对样本进行聚类

sampleTree2=hclust（dist（datExpr），method="

average"

）

#将特征转换为颜色表示:

白色表示低，红色表示高，灰色表示缺少条目

traitColors=numbers2colors（datTraits，signed=FALSE）；

＃绘制出树状图和下面的颜色。

plotDendroAndColors（sampleTree2，traitColors，

groupLabels=names（datTraits），

main=”Sampledendrogramandtraitheatmap"

＃保存数据datExpr,datTraits

save（datExpr,datTraits,file="

FemaleLiver-01—dataInput。

RData”）

#允许多线程，这有助于加快某些计算的速度。

#警告：

如果您运行RStudio或其他第三方R环境，请跳过这一行。

enableWGCNAThreads（）

#导入第一部分的数据

lnames=load（file=”FemaleLiver-01—dataInput。

RData”）；

lnames

#构建一个权重基因网络需要选择邻接矩阵权重参数

#给出候选的β值

powers=c（c（1:

10），seq（from=12,to=20,by=2））

#调用网络拓扑分析函数

sft=pickSoftThreshold（datExpr,powerVector=powers,verbose=5）

#对结果作图

sizeGrWindow（9，5）

par（mfrow=c（1，2））;

cex1=0.9；

#无标度拓扑拟合指数作为软阈值的函数

plot（sft$fitIndices[，1]，-sign（sft＄fitIndices［，3］）*sft$fitIndices[，2］，

xlab="

SoftThreshold（power）"

ylab="

ScaleFreeTopologyModelFit，signedR^2"

，type="

，

main=paste（"

Scaleindependence"

））；

text（sft＄fitIndices［，1],-sign（sft$fitIndices[，3］）*sft$fitIndices［,2］，

labels=powers，cex=cex1,col="

#在图中高度为0.9的位置画线，这个值对应的是相关系数的平方R^2

abline（h=0.90,col=”red”）

＃平均连通性是软阈值能力的函数

plot（sft$fitIndices[,1］，sft＄fitIndices［，5］，

SoftThreshold（power）”，ylab=”MeanConnectivity"

type=”n”,

main=paste（”Meanconnectivity"

））

text（sft＄fitIndices［，1]，sft$fitIndices［,5]，labels=powers，cex=cex1，col="

red”）

#调用一个函数能够构建基因网络和识别模块

net=blockwiseModules（datExpr，power=6，

TOMType="

unsigned”,minModuleSize=30，

reassignThreshold=0,mergeCutHeight=0.25，

numericLabels=TRUE,pamRespectsDendro=FALSE,

saveTOMs=TRUE,

saveTOMFileBase="

femaleMouseTOM"

verbose=3）

#创建一个图形窗口

sizeGrWindow（12，9）

#将标签转化为绘图颜色

mergedColors=labels2colors（net＄colors）

#绘制树状图和下面的模块颜色

plotDendroAndColors（net$dendrograms［［1］］，mergedColors[net$blockGenes［［1]］],

Modulecolors"

dendroLabels=FALSE,hang=0.03,

addGuide=TRUE，guideHang=0.05）

#保存数据

moduleLabels=net＄colors

moduleColors=labels2colors（net＄colors）

MEs=net＄MEs；

geneTree=net$dendrograms[［1]]；

save（MEs,moduleLabels，moduleColors，geneTree，

file="

FemaleLiver-02—networkConstruction—auto。

RData"

＃=====================================================================================

＃加载第一部分中保存的表达式和特征数据

lnames=load（file=”FemaleLiver-01-dataInput。

#变量的lname包含了加载变量的名称.

#加载在第二部分中保存的网络数据。

lnames=load（file=”FemaleLiver-02—networkConstruction—auto。

RData”）;

＃定义基因和样本的数量

nGenes=ncol（datExpr）;

nSamples=nrow（datExpr）;

#用彩色标签重新计算MEs

MEs0=moduleEigengenes（datExpr,moduleColors）$eigengenes

MEs=orderMEs（MEs0）

moduleTraitCor=cor（MEs，datTraits,use=”p”）；

moduleTraitPvalue=corPvalueStudent（moduleTraitCor，nSamples）；

sizeGrWindow（10,6）

＃将显示相关性及其p值，

textMatrix=paste（signif（moduleTraitCor,2）,"

\n（”，

signif（moduleTraitPvalue，1）,”）”,sep="

dim（textMatrix）=dim（moduleTraitCor）

par（mar=c（6，8.5,3，3））；

＃在一个热图图中显示相关值

labeledHeatmap（Matrix=moduleTraitCor，

xLabels=names（datTraits）,

yLabels=names（MEs）,

ySymbols=names（MEs），

colorLabels=FALSE，

colors=greenWhiteRed（50），

textMatrix=textMatrix,

setStdMargins=FALSE,

text=0.5，

zlim=c（-1，1）,

main=paste（”Module-traitrelationships"

＃定义包含数据特征权重列的变量权重

weight=as。

data。

frame（datTraits$weight_g）；

names（weight）=”weight”

＃模块的名称（颜色）

modNames=substring（names（MEs），3）

#gene和模块的关系

geneModuleMembership=as。

data.frame（cor（datExpr，MEs,use="

p”））;

MMPvalue=as.data。

frame（corPvalueStudent（as.matrix（geneModuleMembership），nSamples））；

names（geneModuleMembership）=paste（"

MM”，modNames,sep="

”）；

names（MMPvalue）=paste（”p。

MM"

modNames,sep=”"

）;

#gene和性状的关系

geneTraitSignificance=as。

data.frame（cor（datExpr,weight，use="

GSPvalue=as.data.frame（corPvalueStudent（as.matrix（geneTraitSignificance），nSamples））;

names（geneTraitSignificance）=paste（"

GS."

names（weight），sep="

names（GSPvalue）=paste（”p。

”）;

module=”brown”

column=match（module，modNames）；

moduleGenes=moduleColors==module;

sizeGrWindow（7,7）;

par（mfrow=c（1，1））;

verboseScatterplot（abs（geneModuleMembership［moduleGenes，column］），

abs（geneTraitSignificance[moduleGenes,1］）,

xlab=paste（"

ModuleMembershipin”,module，”module”）,

ylab="

Genesignificanceforbodyweight”，

main=paste（”Modulemembershipvs.genesignificance\n"

）,

cex.main=1。

2,cex.lab=1。

2，cex.axis=1.2，col=module）

names（datExpr）

names（datExpr）[moduleColors==”brown”]

#把与体重相关的模块和基因注释信息关联

annot=read.csv（file=”GeneAnnotation.csv"

dim（annot）

names（annot）

probes=names（datExpr）

probes2annot=match（probes,annot$substanceBXH）

＃Thefollowingisthenumberorprobeswithoutannotation:

sum（is。

na（probes2annot））

＃Shouldreturn0。

#现在，创建一个数据框架

geneInfo0=data.frame（substanceBXH=probes,

geneSymbol=annot$gene_symbol[probes2annot］,

LocusLinkID=annot$LocusLinkID［probes2annot］,

moduleColor=moduleColors，

geneTraitSignificance，

GSPvalue）

＃按权重对体重进行排序

modOrder=order（—abs（cor（MEs，weight，use="

）））;

#按给定的顺序中添加模块成员信息

for（modin1：

ncol（geneModuleMembership））

{

oldNames=names（geneInfo0）

geneInfo0=data.frame（geneInfo0,geneModuleMembership[，modOrder［mod]],

MMPvalue［,modOrder[mod］］）;

names（geneInfo0）=c（oldNames,paste（”MM.”，modNames［modOrder[mod]］,sep=”"

paste（"

p.MM。

”,modNames［modOrder[mod］],sep=”"

}

＃首先通过模块颜色来排列基因信息变量的基因，然后根据基因的重要性

geneOrder=order（geneInfo0＄moduleColor，-abs（geneInfo0＄GS。

weight））;

geneInfo=geneInfo0[geneOrder,]

write.csv（geneInfo，file="

geneInfo。

csv"

展开阅读全文