WGCNA代码

WGCNA

rm(list = ls())

#加载R包

library(WGCNA)

library(tidyr)

library(ggplot2)

library(gridExtra)

setwd("D:\\项目探索\\水稻EDP\\文献\\WGCNA数据结果20190426")

#=================================================================#

#

# 1.导入表达数据

#

#=================================================================#

gene_exp - read.table('D:\\项目探索\\水稻EDP\\文献\\WGCNA数据结果20190426\\DSC.txt',

                         sep = '\t',

                         header = T,

                         stringsAsFactors = F,

                         #第一列作为行名

                         row.names = 1)

gene_exp[is.na(gene_exp)] - 0 # 将NA转换为0

# 根据方差筛选

m.vars=apply(gene_exp,1,var)

expro.upper=gene_exp[which(m.varsquantile(m.vars, probs = seq(0, 1, 0.25))[4]),]

dim(expro.upper)

datExpr - t(expro.upper) # 在WGCNA中需要转置表达矩阵

gsg = goodSamplesGenes(datExpr,verbose = 3)

gsg$allOK

#查看行和列

dim(datExpr)

# 聚类分析是否存在离群值

sampleTree = hclust(dist(datExpr),method = "average")

sizeGrWindow(12,9)

par(cex=0.6)

par(mar = c(0,4,2,0))

plot(sampleTree,

     main = "Sample clustering to detect outliners",

     sub = "",xlab = "",cex.lab=1.5,

     cex.axis=1.5,cex.main = 2)

# 存在离群样本时可以剔除

if(F){abline(h = 15,col = "red")

clust = cutreeStatic(sampleTree,cutHeight = 15,minSize = 10)

table(clust)

keepSamples = (clust==1)

datExpr =datExpr0[keepSamples,]

nGenes = ncol(datExpr)

nSamples = nrow(datExpr)

}

#=================================================================#

#

# 2.寻找最佳β值

#

#=================================================================#

# 选择并构建一个软阈值向量

powers = c(c(1:10),seq(from = 12,to = 20,by=2)

# 执行TOM功能

sft = pickSoftThreshold(datExpr,powerVector = powers,verbose = 5)

# 绘图

sizeGrWindow(9,5)

par(mfrow = c(1,2)) # 一页多图

cex1 = 0.9

# Scale-free topology fit index as a function of the soft-thresholding power

plot(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],

xlab="Soft Threshold(power)",ylab = "Scale Free Topology Model Fit,signed R^2",type="n",

main = paste("Scale independence"))

text(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red")

abline(h = 0.90,col = "red")

#  Mean connectivity as a function of the soft-thresholding power

plot(sft$fitIndices[,1],sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab ="Mean Connectivity",type = "n",main = paste("Mean connectivity"))

text(sft$fitIndices[,1],sft$fitIndices[,5],labels=powers,cex = cex1,col="red")

#=================================================================#

#

# 3.一步法构建网络模块

#3.1 minModuleSize指定Module所含基因数最少

#3.2 mergeCutHeight指定合并Module的阈值

#3.3 列表net中包含了许多信息

#3.4 可以通过recutBlockwiseTrees函数来修改模块的一些参数,而不用重新计算聚类树

#

#=================================================================#

net = blockwiseModules(datExpr,power = sft$powerEstimate,

TOMType = "unsigned",minModuleSize = 30,reassignThreshold = 0,mergeCutHeight = 0.25,

numericLabels =TRUE,pamRespectsDendro = FALSE,saveTOMs = TRUE,saveTOMFileBase = "RiceTOM",

verbose = 3)

table(net$colors)

# 绘图

sizeGrWindow(12,9)

# 将标签转换为颜色

mergedColors = labels2colors(net$colors)

# 绘制树图及模块颜色图

plotDendroAndColors(net$dendrograms[[1]],mergeColors[net$blockGenes[[1]],"Module colors",dendroLabels = FALSE,hang = 0.03,addGuide = TRUE,guideHang = 0.05)

# 保存相关数据

moduleLabels = net$colors

moduleColors = labels2colors(net$colors)

MEs = net$MEs

geneTree = net$dendrograms[[1:2]] # 可能会存在基因较多,默认将基因分成了两部分

#=================================================================#

# 3.步步法构建

# 3.构建网络

# 3.1.计算相关系数

# 3.2.计算邻接矩阵

# 3.3.计算TOM矩阵

# 3.4.聚类并且划分模块

# 3.5.合并相似模块

#=================================================================#

# 计算邻接矩阵

softpower = sft$powerEstimate

adjacency = adjacency(datExpr,power = softPower)

# 通过邻接矩阵计算TOM矩阵

TOM = TOMsimilarity(adjacency)

dissTOM = 1 - TOM # 此为距离矩阵

# 通过距离矩阵进行聚类分析基因

geneTree = hclust(as.dist(dissTOM),method = "average")

sizeGrWindow(12,9)

plot(geneTree,xlab="",sub="",main="Gene clustering on TOM-based dissimilarity",labels=FALSE,hang = 0.04)

# 设置模块中基因最小数

minModuleSize = 30

# 通过动态树剪切鉴定出模块

dynamicMods = cutreeDynamic(dendro = geneTree,disM = dissTOM,deepSplit = 2,pamRespectsDendro = FALSE,minClusterSize = minModuleSize)

table(dynamicMods)

# 绘图

dynamicColors = labels2colors(dynamicMods)

table(dynamicColors)

sizeGrWindow(8,6)

plotDendroAndColors(geneTree,dynamicColors,"Dynamic Tree Cut",dendroLabels = FALSE,hang = 0.03,addGuide = TRUE,guideHang = 0.05,main = " Gene dendrogram and module colors")

# 合并表达近似的Modules

# 计算Modules的特征基因

MEList = moduleEigengenes(datExpr,color = dynamicColors)

MEs = MEList$eigengenes

# 计算Modules之间的相关性

MEDiss = 1 - cor(MEs)

# 聚类特征基因

METree = hclust(as.dist(MEDiss),method = "average")

# 绘图

sizeGrWindow(7,6)

plot(METree,main = "Clustering of eigengenes",xlab="",sub="")

# 自动合并

MEDissThres = 0.25

abline(h=MEDissThres,col = "red")

merge = mergeCloseModules(datExpr,dynamicColors,cutHeight = MEDissThres,verbose = 3)

mergedColors = merge$colors

mergedMEs = merge$newMEs

# 绘图对比合并前后模块

sizeGrWindow(12,9)

plotDendroAndColors(geneTree,cbind(dynamicColors,mergedColors),c("Dynamic Tree Cut","Merged dynamic"),dendroLabels = FALSE,hang =0.03,addGuide = TRUE,guideHang = 0.05)

# 保存相关的变量数据

moduleColors = mergedColors

# 转换为数字标签

colorOrder = c("grey",standardColors(50))

moduleLabels = match(moduleColors,colorOrder)-1

MEs = mergedMEs

#=================================================================#

#

# 3.大数据集处理

# 基本思想:使用两层聚类

#

#=================================================================#

#  Block-wise network construction and module detection

bwnet - blockwiseModules(datExpr,maxBlockSize = 2000,   # 指定电脑最大可运行的模块

power = sft$powerEstimate,TOMtype ="unsigned",minModuleSize = 30,

reassignThreshold = 0, mergeCutHeight = 0.25,numericLabels = TRUE,

saveTOMs = TRUE,saveTOMFileBase = "RiceTOM-blockwise",

verbose = 3)

bwLabels = matchLabels(bwnet$colors,moduleLabels)

bwModuleColors = labels2colors(bwLabels)

table(bwLabels)

# 绘图

sizeGrWindow(6,6)

# block1绘图

plotDendroAndColors(bwnet$dendrograms[[1]], bwModuleColors[bwnet$blockGenes[[1]]], "Module colors", main = "Gene dendrogram and module colors in block 1", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)

# block2绘图

plotDendroAndColors(bwnet$dendrograms[[2]], bwModuleColors[bwnet$blockGenes[[2]]], "Module colors", main = "Gene dendrogram and module colors in block 2", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)

# 绘图比较single block和block-wise的区别

sizeGrWindow(12,9) 

plotDendroAndColors(geneTree, cbind(moduleColors, bwModuleColors), c("Single block", "2 blocks"), main = "Single block gene dendrogram and module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)

# 通过Eigengenes比较single block和block-wise的区别

singleBlockMEs = moduleEigengenes(datExpr, moduleColors)$eigengenes

blockwiseMEs = moduleEigengenes(datExpr, bwModuleColors)$eigengenes

single2blockwise = match(names(singleBlockMEs), names(blockwiseMEs))

signif(diag(cor(blockwiseMEs[, single2blockwise], singleBlockMEs)), 3)

#=================================================================#

#

# 4.导入性状数据

#

#=================================================================#

traitData = read.csv("ClinicalTraits.csv")

dim(traitData) 

names(traitData)

# 与表达数据进行校准后构建性状数据框

RiceSamples = rownames(datExpr)

traitRows = match(RiceSamples, allTraits$Sample)

datTraits = allTraits[traitRows, -1]

rownames(datTraits) = allTraits[traitRows, 1]

#=================================================================#

#

# 5.模块与性状的相关关系

#

#=================================================================#

# 量化Module-trait的关系

nGenes = ncol(datExpr)

nSamples = nrow(datExpr)

MEs0 = moduleEigengenes(datExpr,moduleColors)$eigengenes

MEs = orderMEs(MEs0)

moduleTraitCor = cor(MEs,datTraits,use = "p")

moduleTraitPvalue = corPvalueStudent(moduleTrsitCor,nSamples)

# 绘图

sizeGrWindow(10,6)

textMatrix = paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")

dim(textMatrix) = dim(moduleTraitCor)

par(mar = c(6, 8.5, 3, 3))

labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels = names(MEs), ySymbols = names(MEs), colorLabels = FALSE, colors = greenWhiteRed(50), textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1), main = paste("Module-trait relationships"))

#=================================================================#

#

# 6.基因与模块、性状的相关关系

#

#=================================================================#

# 确定基因与模块的关系-MM

modNames = substring(names(MEs),3)  # 提取模块数字标签

geneModuleMembership = as.data.frame(cor(datExpr,MEs,use = "p"))

MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples)) # 通过计算每个Module的Eigengenes与基因的关系

names(geneModuleMembership) = paste("MM", modNames, sep="")

names(MMPvalue) = paste("p.MM", modNames, sep="")

# 提取某个特定的性状

Treatment = as.data.frame(datTraits$Treatment)

names(weight) = "Treatment"

# 确定基因与性状的关系-GS

geneTraitSignificance = as.data.frame(cor(datExpr, Treatment, use = "p")) 

GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples))

names(geneTraitSignificance) = paste("GS.", names(Treatment), sep="")

names(GSPvalue) = paste("p.GS.", names(Treatment), sep="")

#=================================================================#

#

# 7.获取与性状关联度大的特定模块中GS高且MM高的基因

#

#=================================================================#

# 绘制特定模块GS和MM热图

module = "saddlebrown"

column = match(module,modNames)

moduleGenes = moduleColors==module # 获取特定列的数据,只是一种数据提取方法而已

sizeGrWindow(7, 7)

par(mfrow = c(1,1))

verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]), abs(geneTraitSignificance[moduleGenes, 1]), xlab = paste("Module Membership in", module, "module"), ylab = "Gene significance for body weight", main = paste("Module membership vs. gene significance\n"), cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)

# 获取特定模块的特定基因

probes = names(datExpr)[moduleColors==module]

# 输出总体结果数据框

geneInfo0 = data.frame(substanceBXH = probes, geneSymbol = annot$gene_symbol[probes2annot], LocusLinkID = annot$LocusLinkID[probes2annot], moduleColor = moduleColors, geneTraitSignificance, GSPvalue)

# 根据与性状显著相关进行排序

modOrder = order(-abs(cor(MEs, weight, use = "p")))

# 添加模块关系信息

for (mod in 1:ncol(geneModuleMembership)) 

{ oldNames = names(geneInfo0) geneInfo0 = data.frame(geneInfo0, geneModuleMembership[, modOrder[mod]], MMPvalue[, modOrder[mod]]); names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]], sep=""), paste("p.MM.", modNames[modOrder[mod]], sep="")) }

# 第一根据颜色第二根据基因性状显著性进行排序

geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$GS.weight))

geneInfo = geneInfo0[geneOrder, ]

#=================================================================#

#

# 8.进行后续分析GO功能注释等

#

#=================================================================#

# 选择感兴趣的Modules

intModules = c("brown", "red", "salmon")

for (module in intModules)

 {

 # Select module probes 

modGenes = (moduleColors==module) 

# Get their entrez ID codes 

modLLIDs = allLLIDs[modGenes]

# Write them into a file 

fileName = paste("LocusLinkIDs-", module, ".txt", sep="")

write.table(as.data.frame(modLLIDs), file = fileName, row.names = FALSE, col.names = FALSE) 

}

go语言中math.Exp2(10)什么意思?也就是说,Exp2(10)对10进行了什么运算?

math.Exp2(10)就是计算2的10次方。

下面是一个例子

package main

import "fmt"

import "math"

func main() {

fmt.Printf("%f\n",

math.Exp2(10))

fmt.Printf("%f\n",

math.Exp2(4))

}

go语言操作符 ^ 和 &^

很多语言都是采用 ~ 作为按位取反运算符,Go 里面采用的是 ^ 。

如果作为二元运算符,^ 表示按位异或,即:对应位相同为 0,相异为 1。

操作符 ^,按位置零,例如:z = x ^ y,表示如果 y 中的 bit 位为 1,则 z 对应 bit 位为 0,否则 z 对应 bit 位等于 x 中相应的 bit 位的值。

对于有符号的整数来说,是按照补码进行取反操作的(快速计算方法:对数 a 取反,结果为 -(a+1) ),对于无符号整数来说就是按位取反

计算过程

以3为例  3在内存中补码为 0*** 0011

取反            1*** 1100

-1操作          1*** 1011

除符号位取反    1*** 0100 结果为-4

-------------------------------------------

以9为例 9在内存中补码为 0*** 1001

取反            1*** 0110

-1操作          1*** 0101

除符号位取反    1*** 1010 结果为-10

-------------------------------------------

以-5为例 -5在内存中为的补码为 1*** 1011

为什么呢

-5源码          1*** 0101

除符号取反      1*** 1010

+1操作          1*** 1011

-------------------------------------------

那么-5取反怎么算

补码 1***1011取反为 0***0100

因为符号位为0,所以是正数了,正数的补码反码源码都是一个,所以是4

===================================

再看-1

-1源码          1*** 0001

除符号取反      1*** 1110

+1操作          1*** 1111

补码 1*** 1111 取反为 0*** 0000

因为符号位为0,所以是正数了,正数的补码反码源码都是一个,所以是0

go语言取反输出的例子看这里

利用go语言实现求数组交集的算法

题目: 给定两个数组,编写一个函数来计算它们的交集.(来自 leecode(349) )

示例 1:

输入:nums1 = [1,2,2,1], nums2 = [2,2] 输出:[2] 示例 2:

输入:nums1 = [4,9,5], nums2 = [9,4,9,8,4] 输出:[9,4]

说明:

我的解法:

题目同上,只不过在输出的时候

输出结果中每个元素出现的次数,应与元素在两个数组中出现的次数一致。

示例 1:

输入:nums1 = [1,2,2,1], nums2 = [2,2] 输出:[2,2] 示例 2:

输入:nums1 = [4,9,5], nums2 = [9,4,9,8,4] 输出:[9,4]

解法

如果给定的数组是排好序的,

arr1 = [1,2,3,4,4,13],arr2 = [1,2,3,9,10]

那这个返回值该如何获取得两个数组的交集呢?

解法